Example #1
0
    def update_epoch(self, train_results: BaseBatchResults,
                     test_results: BaseTestResults, epoch: int,
                     epoch_time: float):
        self.epoch_time.update(epoch_time)

        # set beta_warmup coefficient
        self.beta = min([
            max([
                self.flags.min_beta,
                ((epoch - self.flags.beta_start_epoch) *
                 (self.flags.max_beta - self.flags.min_beta)) /
                max([self.flags.beta_warmup, 1.])
            ]), self.flags.max_beta
        ])
        log.info(f'beta = {self.beta}')

        self.maybe_send_to_db(train_results=train_results,
                              test_results=test_results,
                              epoch=epoch,
                              beta=self.beta)

        # save checkpoints after every 5 epochs
        if (epoch + 1) % self.flags.checkpoint_freq == 0 or (
                epoch + 1) == self.flags.end_epoch:
            self.exp.mm_vae.save_networks(epoch)

        return self.beta
Example #2
0
def compress_experiment_run_dir(flags) -> None:
    """
    Move zipped experiment_dir_run in TMPDIR to experiment_dir.
    """
    dir_experiment = Path(
        json2dict(
            get_config_path(flags=flags))['dir_experiment']).expanduser()
    dir_experiment.mkdir(exist_ok=True)

    # zip dir_experiment_run
    log.info(
        f'zipping {flags.dir_experiment_run} '
        f'to {(Path(dir_experiment) / flags.experiment_uid).with_suffix(".zip")}.'
    )
    dir_experiment_zipped = (dir_experiment / flags.experiment_uid)

    shutil.make_archive(dir_experiment_zipped,
                        'zip',
                        flags.dir_experiment_run,
                        verbose=True)

    assert dir_experiment_zipped.with_suffix('.zip').exists(), f'{dir_experiment_zipped} does not exist. ' \
                                                               f'Zipping of dir_experiment_run failed.'
    # delete not compressed experiment dir
    shutil.rmtree(str(flags.dir_experiment_run))
Example #3
0
    def finalize(self, test_results: BaseTestResults, epoch: int, average_epoch_time):
        log.info('Finalizing.')
        # write results as json to experiment folder
        run_metadata = {'end_epoch': epoch, 'experiment_duration': time.time() - self.begin_time,
                        'mean_epoch_time': self.callback.epoch_time.get_average()}

        dict2json(self.flags.dir_experiment_run / 'results.json', test_results.__dict__ | run_metadata)

        if self.flags.use_db == 1:
            self.exp.experiments_database.insert_dict(run_metadata)
            self.exp.experiments_database.save_networks_to_db(dir_checkpoints=self.flags.dir_checkpoints, epoch=epoch,
                                                              modalities=self.exp.mm_vae.modalities)
            # self.exp.experiments_database.upload_logfile(self.flags.log_file)
            self.exp.experiments_database.upload_tensorbardlogs(self.flags.dir_experiment_run / 'logs')

            # run jupyter notebook with visualisations
            pdf_path = run_notebook_convert(self.flags.dir_experiment_run)

        # send alert
        if self.flags.norby and self.flags.dataset != 'toy':
            import ppb
            import norby
            expvis_url = ppb.upload(pdf_path, plain=True)
            self.exp.experiments_database.insert_dict({'expvis_url': expvis_url})
            norby.send_msg(f'Experiment {self.flags.experiment_uid} has finished. The experiment visualisation can be '
                           f'found here: {expvis_url}')

        if self.flags.log_file.exists():
            shutil.move(self.flags.log_file, self.flags.dir_experiment_run)
Example #4
0
    def __init__(self, dir_data: Path, transform=None, target_transform=None, num_modalities: int = 5):
        """
            Args:
                unimodal_datapaths (list): list of paths to weakly-supervised unimodal datasets with samples that
                    correspond by index. Therefore the numbers of samples of all datapaths should match.
                transform: tranforms on colored MNIST digits.
                target_transform: transforms on labels.
                num_modalities (int):  number of modalities.
        """
        super().__init__()

        if not dir_data.exists():
            log.info(f'data dir {dir_data} does not exist. Creating PolyMNIST dataset. This may take a while...')
            self.create_polymnist_dataset((dir_data / 'train'), Path(__file__).parent / 'polymnist_background_images',
                                          num_modalities, train=True)
            self.create_polymnist_dataset((dir_data / 'test'), Path(__file__).parent / 'polymnist_background_images',
                                          num_modalities, train=False)

        self.num_modalities = num_modalities
        self.unimodal_datapaths = sorted(glob.glob(str(dir_data / 'm*')))[:num_modalities]
        self.transform = transform
        self.target_transform = target_transform

        # save all paths to individual files
        self.file_paths = {dp: [] for dp in self.unimodal_datapaths}
        for dp in self.unimodal_datapaths:
            assert Path(dp).exists(), f'data path {dp} does not exist.'
            files = glob.glob(os.path.join(dp, "*.png"))
            assert files, f'No png file found under {dp}'
            self.file_paths[dp] = files
        # assert that each modality has the same number of images
        num_files = len(self.file_paths[dp])
        for files in self.file_paths.values():
            assert len(files) == num_files
        self.num_files = num_files
Example #5
0
    def setup_leomed(self, flags):
        tmpdir = Path(os.getenv("TMPDIR"))

        # unzip polymnist dataset to tmpdir
        if flags.dataset == 'polymnist':
            polymnist_zip_path = Path(
                '/cluster/work/vogtlab/Projects/Polymnist/PolyMNIST.zip')
            out_dir = tmpdir

            log.info(
                f'Extracting data from {polymnist_zip_path} to {out_dir}.')
            unpack_zipfile(polymnist_zip_path, out_dir)

            flags.dir_data = out_dir / 'PolyMNIST'

            assert out_dir.exists(), f'Data dir {out_dir} does not exist.'

        # unzip celeba dataset to tmpdir
        elif flags.dataset == 'celeba':
            celeba_zip_path = Path(flags.dir_data).expanduser()
            out_dir = tmpdir

            log.info(f'Extracting data from {celeba_zip_path} to {out_dir}.')
            unpack_zipfile(celeba_zip_path, out_dir)

            flags.dir_data = out_dir / 'CelebA'

            assert out_dir.exists(
            ), f'Data dir {out_dir} does not exist in {list(out_dir.parent.iterdir())}.'

        flags.dir_fid = tmpdir / 'fid'

        flags.dir_experiment = tmpdir

        return flags
Example #6
0
 def delete_one(self, _id: str):
     """
     Removes one document from db
     """
     log.info(f'Deleting document with _id: {_id}.')
     experiment = self.connect()
     experiment.delete_one({'_id': _id})
def upload_one(exp_path: Path):
    """
    Upload one experiment result to database together with the model checkpoints,
    the logfile and tensorboardlogs, then delete zipped experiment dir.
    """
    is_zip = exp_path.suffix == '.zip'
    with tempfile.TemporaryDirectory() as tmpdirname:
        tmpdir = Path(tmpdirname) / exp_path.stem
        tmpdir.mkdir()

        if is_zip:
            # unpack zip into tmpdir
            log.info(f'Unpacking {exp_path} to {tmpdir}.')
            with zipfile.ZipFile(exp_path) as z:
                z.extractall(tmpdir)
            exp_dir = Path(tmpdir)
        else:
            exp_dir = exp_path

        flags = torch.load(exp_dir / 'flags.rar')
        db = MongoDatabase(training=True, flags=flags)
        results = {'epoch_results': {}}

        epochs = sorted(int(str(epoch.stem)) for epoch in (exp_dir / 'epoch_results').iterdir())

        for epoch in epochs:
            epoch_str = str(epoch)
            epoch_results = (exp_dir / 'epoch_results' / epoch_str).with_suffix('.json')
            results['epoch_results'][epoch_str] = json2dict(epoch_results)

        db.insert_dict(results)

        modalities = [mod_str for mod_str in results['epoch_results'][str(epoch)]['train_results']['log_probs'] if
                      len(mod_str.split('_')) == 1]
        dir_checkpoints = exp_dir / 'checkpoints'
        db.save_networks_to_db(
            dir_checkpoints=dir_checkpoints,
            epoch=max(int(str(d.name)) for d in dir_checkpoints.iterdir()),
            modalities=modalities,
        )

        db.upload_tensorbardlogs(exp_dir / 'logs')

        pdf_path = run_notebook_convert(exp_dir)
        expvis_url = ppb.upload(pdf_path, plain=True)
        db.insert_dict({'expvis_url': expvis_url})

        log_file = glob.glob(str(exp_dir) + '/*.log')
        if len(log_file):
            db.upload_logfile(Path(log_file[0]))

        send_msg(f'Uploading of experiment {flags.experiment_uid} has finished. The experiment visualisation can be '
                 f'found here: {expvis_url}'
                 )

    # delete exp_path
    if is_zip:
        exp_path.unlink()
    else:
        shutil.rmtree(exp_path)
Example #8
0
def get_hyperopt_score(test_results: BaseTestResults, use_zk: bool,
                       optuna_trial):
    """
    Sum over all metrics to get a score for the optimization of hyperparameters.
    """

    # score_gen_eval = np.mean([score for _, score in test_results.gen_eval.items()])
    score_gen_eval = np.mean([
        score
        for score in get_missing_mod_scores_gen_eval(test_results.gen_eval)
    ])
    # score_prd = np.mean([score for _, score in test_results.prd_scores.items()])
    score_prd = np.mean([
        score for score in get_missing_mod_scores_prd(test_results.prd_scores)
    ])

    # if use_zk:
    #     score_lr_eval = np.mean([score['accuracy'] for _, score in test_results.lr_eval_zk.items()])
    # else:
    #     score_lr_eval = np.mean([score['accuracy'] for _, score in test_results.lr_eval_q0.items()])

    # add metrics to optuna so that they can be retrieved in the database.
    optuna_trial.suggest_float("score_gen_eval", score_gen_eval,
                               score_gen_eval)
    optuna_trial.suggest_float("score_prd", score_prd, score_prd)
    # optuna_trial.suggest_float("score_lr_eval", score_lr_eval, score_lr_eval)

    # score = score_lr_eval / 0.9 + score_gen_eval / 0.55 + score_prd / 0.05
    score = score_gen_eval / 0.55 + score_prd / 0.05
    # score = score_lr_eval / 0.75 + score_gen_eval / 0.55 + score_prd / 0.1
    log.info(f'Current hyperopt score is {score}. '
             # f'score_lr_eval: {score_lr_eval}, '
             f'score_gen_eval: {score_gen_eval}, '
             f'score_prd: {score_prd}')
    return optuna_trial, score
Example #9
0
def run_hyperopt_epochs(trainer: PolymnistTrainer) -> int:
    test_results = trainer.run_epochs()

    # clean experiment run dir
    shutil.rmtree(trainer.flags.dir_experiment_run)
    log.info(
        f'Finished hyperopt run with score: {test_results.hyperopt_score}.')
    return test_results.hyperopt_score
Example #10
0
def stdout_if_verbose(verbose: int, message, min_level: int):
    """
    verbose: current global verbose setting
    message: message to be sent to stdout
    level: minimum verbose level needed to send the message
    """
    if verbose >= min_level:
        log.info(message)
Example #11
0
def run_one(exp_dir: str):
    exp_dir = Path(exp_dir).expanduser()
    log.info(
        f'Starting execution of experiment vis for experiment {exp_dir.name}')
    pdf_path = run_notebook_convert(exp_dir)
    expvis_url = ppb.upload(pdf_path, plain=True)
    log.info(f'Uploaded experiment vis to {expvis_url}')
    db = MongoDatabase(training=False, _id=exp_dir.name)
    db.insert_dict({'expvis_url': expvis_url})
Example #12
0
def get_ds_stats(args):
    path = Path(__file__).parent.parent / 'data/dataset_stats.json'
    if not path.exists():
        from mimic.dataio.find_dataset_stats import get_mean_std
        log.info(
            "Dataset statistics not found, need to create them. This may take a while."
        )
        get_mean_std(out_path=path, args=args)
    with open(path, 'r') as jsonfile:
        data = json.load(jsonfile)
    return data['PA_mean'], data['PA_std']
Example #13
0
    def set_rec_weights(self):
        """
        Sets the weights of the log probs for each modality.
        """
        log.info('setting rec_weights')

        return {
            'PA': self.flags.rec_weight_m1,
            'Lateral': self.flags.rec_weight_m2,
            'text': self.flags.rec_weight_m3
        }
Example #14
0
 def set_dataset(self):
     log.info('setting dataset')
     # used for faster unittests i.e. a dummy dataset
     if self.flags.dataset == 'toy':
         log.info('using testing dataset')
         self.flags.vocab_size = 3517
         d_train = Mimic_testing(self.flags)
         d_eval = Mimic_testing(self.flags)
     else:
         d_train = Mimic(self.flags, self.labels, split='train')
         d_eval = Mimic(self.flags, self.labels, split='eval')
     return d_train, d_eval
Example #15
0
    def load_networks_from_db(self, mmvae: BaseMMVae):
        log.info(f'Loading networks from database for model {mmvae}.')
        fs = self.connect_with_gridfs()
        fs_ids = [elem._id for elem in fs.find({})]

        with tempfile.TemporaryDirectory() as tmpdirname:
            tmpdirname = Path(tmpdirname)
            for mod_str in mmvae.modalities:
                for prefix in ['en', 'de']:
                    filename = tmpdirname / f"{prefix}coderM{mod_str}"
                    model_id = self.experiment_uid + f"__{prefix}coderM{mod_str}"
                    with open(filename, 'wb') as fileobject:
                        fileobject.write(fs.get(model_id).read())

            mmvae.load_networks(tmpdirname)
        return mmvae
Example #16
0
    def save_networks_to_db(self, dir_checkpoints: Path, epoch: int,
                            modalities):
        """
        Inspired from https://medium.com/naukri-engineering/way-to-store-large-deep-learning-models-in-production-ready-environments-d8a4c66cc04c
        There is probably a better way to store Tensors in MongoDB.
        """
        fs = self.connect_with_gridfs()
        checkpoint_dir = dir_checkpoints / str(epoch).zfill(4)
        fs_ids = [elem._id for elem in fs.find({})]

        for mod_str in modalities:
            for prefix in ['en', 'de']:
                filename = checkpoint_dir / f"{prefix}coderM{mod_str}"
                _id = self.experiment_uid + f"__{prefix}coderM{mod_str}"
                if _id not in fs_ids:
                    with io.FileIO(str(filename), 'r') as fileObject:
                        log.info(f'Saving checkpoint to db: {filename}')
                        fs.put(fileObject, filename=str(filename), _id=_id)
Example #17
0
def upload_notebook_to_db(experiment_uid: str) -> None:
    """
    Run the experiment vis notebook and upload it with ppb to db.
    """
    import ppb

    with tempfile.TemporaryDirectory() as tmpdirname:
        dir_experiment_run = Path(tmpdirname) / experiment_uid
        dir_experiment_run.mkdir()

        db = MongoDatabase(training=False, _id=experiment_uid)
        dict2json(dir_experiment_run / 'flags.json', db.get_experiment_dict()['flags'])

        pdf_path = run_notebook_convert(dir_experiment_run=dir_experiment_run)

        expvis_url = ppb.upload(pdf_path, plain=True)
        log.info(f'Experiment_vis was uploaded to {expvis_url}')
        db.insert_dict({'expvis_url': expvis_url})
Example #18
0
    def __init__(self, args, alphabet, partition=0, transform=None):
        self.dir_dataset_base = args.dir_data

        if not self.dir_dataset_base.exists():
            log.info(
                f'Dataset not found under {self.dir_dataset_base}. Parent directory contains: '
                f'{list(self.dir_dataset_base.parent)}')
            download_zip_from_url(
                url=
                'https://www.dropbox.com/sh/lx8669lyok9ois6/AACCZqDiZuv0Q8RA3Qmwrwnca/celeba_data.zip?dl=1',
                dest_folder=Path(self.dir_dataset_base).parent,
                verbose=True)

        filename_text = self.dir_dataset_base / (
            'list_attr_text_' + str(args.len_sequence).zfill(3) + '_' +
            str(args.random_text_ordering) + '_' +
            str(args.random_text_startindex) + '_celeba.csv')
        filename_partition = os.path.join(self.dir_dataset_base,
                                          'list_eval_partition.csv')
        filename_attributes = os.path.join(self.dir_dataset_base,
                                           'list_attr_celeba.csv')

        df_text = pd.read_csv(filename_text)
        df_partition = pd.read_csv(filename_partition)
        df_attributes = pd.read_csv(filename_attributes)

        self.args = args
        self.img_dir = os.path.join(self.dir_dataset_base, 'img_align_celeba')
        self.txt_path = filename_text
        self.attrributes_path = filename_attributes
        self.partition_path = filename_partition

        self.alphabet = alphabet
        self.img_names = df_text.loc[df_partition['partition'] ==
                                     partition]['image_id'].values
        self.attributes = df_attributes.loc[df_partition['partition'] ==
                                            partition]
        self.labels = df_attributes.loc[
            df_partition['partition'] ==
            partition].values  # atm, i am just using blond_hair as labels
        self.y = df_text.loc[df_partition['partition'] ==
                             partition]['text'].values
        self.transform = transform
Example #19
0
def classify_cond_gen_samples(exp, labels: Tensor, cond_samples: typing.Mapping[str, Tensor]) \
        -> typing.Mapping[str, Tensor]:
    """
    Classifies for each modality all the conditionally generated samples.
    Returns a dict like the following:
    {'PA': tensor,
    'Lateral': tensor,
    'text': tensor}

    """
    clf_predictions = {mod: {} for mod in exp.modalities}
    for mod in exp.modalities:
        if mod in cond_samples:
            mod_cond_gen: Tensor = cond_samples[mod]
            mod_clf = exp.modalities[mod].clf
            # classify generated sample to evaluate coherence
            clf_predictions[mod] = mod_clf(mod_cond_gen).cpu()
        else:
            log.info(str(mod) + 'not existing in cond_gen_samples')
    return clf_predictions
Example #20
0
    def set_modalities(self) -> typing.Mapping[str, BaseModality]:
        log.info('setting modalities')
        mods = {}
        for mod_str in self.flags.mods.split('_'):
            if mod_str == 'F':
                mod = MimicPA(self.flags, self.labels,
                              self.flags.rec_weight_m1, self.plot_img_size)
            elif mod_str == 'L':
                mod = MimicLateral(self.flags, self.labels,
                                   self.flags.rec_weight_m2,
                                   self.plot_img_size)
            elif mod_str == 'T':
                mod = MimicText(self.flags, self.labels,
                                self.flags.rec_weight_m3, self.plot_img_size,
                                self.dataset_train.report_findings_dataset.i2w)
            else:
                raise ValueError(f'Invalid mod_str {mod_str}.' +
                                 'Choose between {T,L,T}')
            mods[mod.name] = mod

        return mods
Example #21
0
    def upload_tensorbardlogs(self, tensorboard_logdir: Path) -> None:
        """zip tensorboard logs and save them to db."""
        fs = self.connect_with_gridfs()
        fs_ids = [elem._id for elem in fs.find({})]

        file_id = self.experiment_uid + f"__tensorboard_logs"
        if file_id not in fs_ids:
            with tempfile.TemporaryDirectory() as tmpdirname:
                log.info(f'Zipping {file_id} to {tmpdirname}.')

                zipfile = Path(tmpdirname) / tensorboard_logdir.name
                shutil.make_archive(zipfile,
                                    'zip',
                                    tensorboard_logdir,
                                    verbose=True)

                log.info(f'Uploading tensorboard logs to db.')
                with io.FileIO(str(zipfile.with_suffix('.zip')),
                               'r') as fileObject:
                    fs.put(fileObject,
                           filename=str(tensorboard_logdir.name),
                           _id=file_id)
Example #22
0
def get_lr_training_data(args, exp, mm_vae, subsets: List[str], train_loader,
                         num_training_samples_lr):
    """
    Get the latent embedding as the training data for the linear classifiers.
    """
    data_train = None
    all_labels = torch.Tensor()
    log.info(
        f"Creating {num_training_samples_lr} batches of the latent representations for the classifier."
    )
    for it, (batch_d, batch_l) in tqdm(enumerate(train_loader),
                                       total=len(train_loader),
                                       postfix='creating_train_lr'):
        """
        Constructs the training set (labels and inferred subsets) for the classifier training.
        """
        if it >= 1000 and it >= num_training_samples_lr:
            break
        batch_d = {k: v.to(exp.flags.device) for k, v in batch_d.items()}
        _, joint_latent = mm_vae.module.inference(
            batch_d) if args.distributed else mm_vae.inference(batch_d)
        joint_latent: Union[JointLatents, JointLatentsFoEM, JointLatentsFoJ,
                            JointLatentsFoS]

        all_labels = torch.cat((all_labels, batch_l), 0)

        lr_data = joint_latent.get_lreval_data()

        if data_train is None:
            data_train = lr_data
        else:
            for which_lr in lr_data:
                for s_key in lr_data[which_lr]:
                    data_train[which_lr][s_key] = torch.cat(
                        (data_train[which_lr][s_key],
                         lr_data[which_lr][s_key]), 0)

    return all_labels, data_train
Example #23
0
def run_notebook_convert(dir_experiment_run: Path = None) -> Path:
    """
    Run and convert the notebook to html and pdf.
    """
    # Copy the experiment_vis jupyter notebook to the experiment dir
    notebook_path = Path(__file__).parent.parent / 'experiment_vis/experiment_vis.ipynb'
    dest_notebook_path = dir_experiment_run / 'experiment_vis.ipynb'

    # copy notebook to experiment run
    shutil.copyfile(notebook_path, dest_notebook_path)

    log.info(f'Executing experiment vis notebook {dest_notebook_path}.')
    with open(dest_notebook_path) as f:
        nb = nbformat.read(f, as_version=4)
    ep = ExecutePreprocessor(timeout=600, kernel_name='python3')
    ep.preprocess(nb, {'metadata': {'path': str(dest_notebook_path.parent)}})

    nbconvert_path = dest_notebook_path.with_suffix('.nbconvert.ipynb')

    with open(nbconvert_path, 'w', encoding='utf-8') as f:
        nbformat.write(nb, f)

    log.info('Converting notebook to html.')
    html_path = nbconvert_path.with_suffix('.html')
    html_exporter = HTMLExporter()
    html_exporter.template_name = 'classic'
    (body, resources) = html_exporter.from_notebook_node(nb)
    with open(html_path, 'w') as f:
        f.write(body)

    log.info('Converting notebook to pdf.')
    pdf_path = nbconvert_path.with_suffix('.pdf')
    pdf_exporter = PDFExporter()
    pdf_exporter.template_name = 'classic'
    (body, resources) = pdf_exporter.from_notebook_node(nb)
    pdf_path.write_bytes(body)

    return pdf_path
Example #24
0
from mmvae_hub import log
from mmvae_hub.celeba.CelebaTrainer import CelebaTrainer
from mmvae_hub.celeba.experiment import CelebaExperiment
from mmvae_hub.celeba.flags import parser, CelebaFlagsSetup
from mmvae_hub.leomed_utils.boilerplate import compress_experiment_run_dir

from mmvae_hub.utils.setup.flags_utils import get_config_path

DATASET = 'celeba'

if __name__ == '__main__':

    flags = parser.parse_args()
    flags_setup = CelebaFlagsSetup(
        get_config_path(dataset=DATASET, flags=flags))
    flags = flags_setup.setup(flags, additional_args={'dataset': DATASET})

    with maybe_norby(flags.norby,
                     f'Starting Experiment {flags.experiment_uid}.',
                     f'Experiment {flags.experiment_uid} finished.'):
        mst = CelebaExperiment(flags)
        mst.set_optimizer()
        trainer = CelebaTrainer(mst)
        trainer.run_epochs()

    log.info('Done.')
    # move zipped experiment_dir_run in TMPDIR to experiment_dir
    if flags.leomed:
        compress_experiment_run_dir(flags)
Example #25
0
 def insert_dict(self, d: dict):
     log.info('Inserting dict to database.')
     experiments = self.connect()
     experiments.find_one_and_update({'_id': self.experiment_uid},
                                     {"$set": d})
Example #26
0
    def test(self, epoch, last_epoch: bool) -> BaseTestResults:
        with torch.no_grad():
            self.exp.set_eval_mode()
            model = self.exp.mm_vae

            d_loader, training_steps, average_meters = self.setup_phase('test')

            for iteration, (batch_d, _) in enumerate(at_most_n(d_loader, training_steps)):
                batch_d = model.batch_to_device(batch_d)
                forward_results: BaseForwardResults = model(batch_d)

                # calculate the loss
                total_loss, joint_divergence, log_probs, klds = model.calculate_loss(forward_results, batch_d)
                results = {**forward_results.__dict__, 'joint_divergence': joint_divergence}

                batch_results = {
                    'total_loss': total_loss.item(),
                    'klds': get_items_from_dict(klds),
                    'log_probs': get_items_from_dict(log_probs),
                    'joint_divergence': results['joint_divergence'].item(),
                    # 'latents': forward_results.enc_mods,
                    # 'joint_latents': forward_results.joint_latents
                }

                for key in batch_results:
                    average_meters[key].update(batch_results[key])

            averages = {k: v.get_average() for k, v in average_meters.items()}

            self.tb_logger.write_testing_logs(**{k: v for k, v in averages.items() if k != 'joint_latents'})

            test_results = BaseTestResults(joint_div=averages['joint_divergence'], **averages)

            log.info('generating plots')
            # temp
            # plots = generate_plots(self.exp, epoch)
            # self.tb_logger.write_plots(plots, epoch)

            if self.flags.eval_lr:
                log.info('evaluation of latent representation')
                # train linear classifiers
                clf_lr_q0, clf_lr_zk = train_clf_lr_all_subsets(self.exp)

                # test linear classifiers
                # methods where the flow is applied on each modality don't have a q0.
                lr_eval_q0 = test_clf_lr_all_subsets(clf_lr_q0, self.exp, which_lr='q0') \
                    if clf_lr_q0 else None
                lr_eval_zk = test_clf_lr_all_subsets(clf_lr_zk, self.exp, which_lr='zk') \
                    if clf_lr_zk else None

                # log results
                lr_eval_results = {'q0': lr_eval_q0, 'zk': lr_eval_zk}
                log.info(f'Lr eval results: {lr_eval_results}')
                self.tb_logger.write_lr_eval(lr_eval_results)
                test_results.lr_eval_q0 = lr_eval_q0
                test_results.lr_eval_zk = lr_eval_zk

            if self.flags.use_clf:
                log.info('test generation')
                gen_eval = test_generation(self.exp)
                log.info(f'Gen eval results: {gen_eval}')
                self.tb_logger.write_coherence_logs(gen_eval)
                test_results.gen_eval = flatten_cond_gen_values(gen_eval)

            if self.flags.calc_nll:
                log.info('estimating likelihoods')
                lhoods = estimate_likelihoods(self.exp)
                self.tb_logger.write_lhood_logs(lhoods)
                test_results.lhoods = lhoods

            if self.flags.calc_prd and (((epoch + 1) % self.flags.eval_freq_fid == 0) or last_epoch):
                log.info('calculating prediction score')
                prd_scores = calc_prd_score(self.exp)
                self.tb_logger.write_prd_scores(prd_scores)
                test_results.prd_scores = prd_scores
        return test_results
Example #27
0
def test_clf_lr_all_subsets(clf_lr: Mapping[str, Mapping[str,
                                                         LogisticRegression]],
                            exp, which_lr: str):
    """
    Test the classifiers that were trained on latent representations.

    which_lr: either q0.mu or zk.
    """
    args = exp.flags
    mm_vae = exp.mm_vae
    mm_vae.eval()

    d_loader = DataLoader(exp.dataset_test,
                          batch_size=exp.flags.batch_size,
                          shuffle=False,
                          num_workers=exp.flags.dataloader_workers,
                          drop_last=True)

    training_steps = exp.flags.steps_per_training_epoch or len(d_loader)
    log.info(
        f'Creating {training_steps} batches of latent representations for classifier testing '
        f'with a batch_size of {exp.flags.batch_size}.')

    # clf_predictions = init_clf_predictions(subsets, which_lr, mm_vae)
    clf_predictions = {}
    batch_labels = torch.Tensor()

    for iteration, (batch_d, batch_l) in enumerate(d_loader):
        if iteration > training_steps:
            break
        batch_labels = torch.cat((batch_labels, batch_l), 0)

        batch_d = dict_to_device(batch_d, exp.flags.device)

        _, joint_latent = mm_vae.module.inference(
            batch_d) if args.distributed else mm_vae.inference(batch_d)
        lr_subsets = joint_latent.subsets

        lr_data = joint_latent.get_lreval_data()
        data_test = lr_data[which_lr]

        clf_predictions_batch = classify_latent_representations(
            exp, clf_lr, data_test)
        clf_predictions_batch: Mapping[str, Mapping[str, np.array]]

        for subset in data_test:
            clf_predictions_batch_subset = torch.cat(
                tuple(
                    torch.tensor(clf_predictions_batch[label]
                                 [subset]).unsqueeze(1)
                    for label in exp.labels), 1)

            if subset in clf_predictions:
                clf_predictions[subset] = torch.cat(
                    [clf_predictions[subset], clf_predictions_batch_subset], 0)
            else:
                clf_predictions[subset] = clf_predictions_batch_subset

    batch_labels = atleast_2d(batch_labels, -1)
    results = {}
    for subset in clf_predictions:
        # calculate metrics
        metrics = exp.metrics(clf_predictions[subset],
                              batch_labels,
                              str_labels=exp.labels)
        metrics_dict = metrics.evaluate()
        results[subset] = metrics.extract_values(metrics_dict)

    return results
Example #28
0
def create_dir_structure(flags: argparse.ArgumentParser(),
                         train: bool = True) -> argparse.ArgumentParser:
    """
    Create following directories under flags.dir_experiment:
    experiment_uid, checkpoints, logs, generation_evaluation, inference, fid_eval, plots.

    Under flags.dir_experiment/plots, create:
    cond_gen, random_samples, swapping
    """
    experiment_uid = flags.experiment_uid
    if train:
        flags.dir_experiment_run = flags.dir_experiment / experiment_uid
        flags.str_experiment = experiment_uid
    else:
        flags.dir_experiment_run = (flags.dir_experiment).expanduser()

    log.info(f'dir_experiment_run: {flags.dir_experiment_run}')
    if train:
        create_dir(flags.dir_experiment_run)

    flags.dir_checkpoints = Path(flags.dir_experiment_run) / 'checkpoints'
    if train:
        create_dir(os.path.expanduser(flags.dir_checkpoints))

    flags.dir_logs = os.path.join(os.path.expanduser(flags.dir_experiment_run),
                                  'logs')
    if train:
        create_dir(flags.dir_logs)

    flags.dir_gen_eval = os.path.join(
        os.path.expanduser(flags.dir_experiment_run), 'generation_evaluation')
    if train:
        create_dir(flags.dir_gen_eval)

    flags.dir_inference = os.path.join(
        os.path.expanduser(flags.dir_experiment_run), 'inference')
    if train:
        create_dir(flags.dir_inference)

    if not train:
        flags.dir_fid = os.path.join(flags.dir_experiment_run, 'fid_eval')
        if not os.path.exists(flags.dir_fid):
            os.makedirs(flags.dir_fid)
    flags.dir_gen_eval_fid = os.path.join(flags.dir_fid, experiment_uid)
    log.info(f'dir_gen_eval_fid: {flags.dir_gen_eval_fid}')
    create_dir(flags.dir_gen_eval_fid)

    flags.dir_plots = os.path.join(flags.dir_experiment_run, 'plots')
    if train:
        create_dir(flags.dir_plots)
    flags.dir_swapping = os.path.join(flags.dir_plots, 'swapping')
    if train:
        create_dir(flags.dir_swapping)

    flags.dir_random_samples = os.path.join(flags.dir_plots, 'random_samples')
    if train:
        create_dir(flags.dir_random_samples)

    flags.dir_cond_gen = os.path.join(flags.dir_plots, 'cond_gen')
    if train:
        create_dir(flags.dir_cond_gen)

    return flags