def get_test_samples( self, num_images=10) -> typing.Iterable[typing.Mapping[str, Tensor]]: """ Gets random samples from the test dataset """ n_test = self.dataset_test.__len__() samples = [] for idx in range(0, num_images * 5, 5): # sample, _ = self.dataset_test.__getitem__(random.randint(0, n_test - 1)) sample, _ = self.dataset_test.__getitem__(idx) sample = dict_to_device(sample, self.flags.device) samples.append(sample) return samples
def calc_coherence_random_gen(exp, mm_vae, iteration: int, rand_coherences: Mapping[str, typing.List], batch_d: dict) -> \ Mapping[str, typing.List]: args = exp.flags # generating random samples rand_gen = mm_vae.module.generate() if args.distributed else mm_vae.generate() rand_gen = dict_to_device(rand_gen, args.device) # classifying generated examples coherence_random = calculate_coherence(exp, rand_gen) for j, l_key in enumerate(exp.labels): rand_coherences[l_key].append(coherence_random[l_key]) if (exp.flags.batch_size * iteration) < exp.flags.num_samples_fid and args.save_figure: # saving generated samples to dir_fid save_generated_samples(exp, rand_gen, iteration, batch_d) return rand_coherences
def get_test_samples( self, num_images=10) -> typing.Iterable[typing.Mapping[str, Tensor]]: """ Gets random samples for the cond. generation. """ n_test = self.dataset_test.__len__() samples = [] for _ in range(num_images): sample, _ = self.dataset_test.__getitem__( random.randint(0, n_test - 1)) sample = utils.dict_to_device(sample, self.flags.device) samples.append(sample) return samples
def get_test_samples(self, num_images=10) -> Iterable[Mapping[str, Tensor]]: """ Gets random samples for the cond. generation. """ random.seed(42) n_test = len(self.dataset_test) samples = [] for i in range(num_images): while True: # loop until sample with label i is found ix = random.randint(0, n_test - 1) sample, target = self.dataset_test[ix] if target == i: samples.append(dict_to_device(sample, self.flags.device)) break return samples
def estimate_likelihoods(exp): model = exp.mm_vae mods = exp.modalities bs_normal = exp.flags.batch_size d_loader = DataLoader(exp.dataset_test, batch_size=exp.flags.batch_size, shuffle=True, num_workers=exp.flags.dataloader_workers, drop_last=True) subsets = exp.subsets if '' in subsets: del subsets[''] lhoods = {} for s_key in subsets: lhoods[s_key] = {m_key: [] for m_key in mods} lhoods[s_key]['joint'] = [] for batch in d_loader: batch_d = dict_to_device(batch[0], exp.flags.device) _, joint_latent = model.inference(batch_d) for s_key in (subsets.keys()): subset = subsets[s_key] ll_batch = calc_log_likelihood_batch(exp, joint_latent, s_key, subset, batch_d, num_imp_samples=6) for m_key in (ll_batch.keys()): lhoods[s_key][m_key].append(ll_batch[m_key].item()) for s_key, lh_subset in lhoods.items(): for m_key in (lh_subset.keys()): mean_val = np.mean(np.array(lh_subset[m_key])) lhoods[s_key][m_key] = mean_val exp.flags.batch_size = bs_normal return lhoods
def classify_generated_samples(args, d_loader, exp, mm_vae, mods, subsets): """ Generates and classifies samples. """ labels = exp.labels rand_coherences = {k: [].copy() for k in labels} # all labels accumulated over batches: batch_labels = torch.Tensor() cond_gen_classified = init_twolevel_nested_dict(subsets, mods, init_val=torch.Tensor()) cond_gen_classified: Mapping[str, Mapping[mods, Tensor]] # for iteration, (batch_d, batch_l) in enumerate(d_loader): for iteration, (batch_d, batch_l) in tqdm(enumerate(d_loader), total=len(d_loader)): batch_labels = torch.cat((batch_labels, batch_l), 0) batch_d = dict_to_device(batch_d, exp.flags.device) # evaluating random generation rand_coherences = calc_coherence_random_gen(exp, mm_vae, iteration, rand_coherences, batch_d) # evaluating conditional generation # first generates the conditional gen_samples # classifies them and stores the classifier predictions _, joint_latent = mm_vae.module.inference(batch_d) if args.distributed else mm_vae.inference(batch_d) cg = mm_vae.module.cond_generation(joint_latent) if args.distributed else mm_vae.cond_generation(joint_latent) cg: typing.Mapping[str, typing.Mapping[mods, Tensor]] # classify the cond. generated samples for subset, cond_val in cg.items(): clf_cg: Mapping[mods, Tensor] = classify_cond_gen_samples(exp, batch_l, cond_val) for mod in mods: cond_gen_classified[subset][mod] = torch.cat((cond_gen_classified[subset][mod], clf_cg[mod]), 0) if (exp.flags.batch_size * iteration) < exp.flags.num_samples_fid and exp.flags.save_figure: save_generated_samples_singlegroup(exp, iteration, subset, cond_val) return batch_labels, rand_coherences, cond_gen_classified
def test_clf_lr_all_subsets(clf_lr: Mapping[str, Mapping[str, LogisticRegression]], exp, which_lr: str): """ Test the classifiers that were trained on latent representations. which_lr: either q0.mu or zk. """ args = exp.flags mm_vae = exp.mm_vae mm_vae.eval() d_loader = DataLoader(exp.dataset_test, batch_size=exp.flags.batch_size, shuffle=False, num_workers=exp.flags.dataloader_workers, drop_last=True) training_steps = exp.flags.steps_per_training_epoch or len(d_loader) log.info( f'Creating {training_steps} batches of latent representations for classifier testing ' f'with a batch_size of {exp.flags.batch_size}.') # clf_predictions = init_clf_predictions(subsets, which_lr, mm_vae) clf_predictions = {} batch_labels = torch.Tensor() for iteration, (batch_d, batch_l) in enumerate(d_loader): if iteration > training_steps: break batch_labels = torch.cat((batch_labels, batch_l), 0) batch_d = dict_to_device(batch_d, exp.flags.device) _, joint_latent = mm_vae.module.inference( batch_d) if args.distributed else mm_vae.inference(batch_d) lr_subsets = joint_latent.subsets lr_data = joint_latent.get_lreval_data() data_test = lr_data[which_lr] clf_predictions_batch = classify_latent_representations( exp, clf_lr, data_test) clf_predictions_batch: Mapping[str, Mapping[str, np.array]] for subset in data_test: clf_predictions_batch_subset = torch.cat( tuple( torch.tensor(clf_predictions_batch[label] [subset]).unsqueeze(1) for label in exp.labels), 1) if subset in clf_predictions: clf_predictions[subset] = torch.cat( [clf_predictions[subset], clf_predictions_batch_subset], 0) else: clf_predictions[subset] = clf_predictions_batch_subset batch_labels = atleast_2d(batch_labels, -1) results = {} for subset in clf_predictions: # calculate metrics metrics = exp.metrics(clf_predictions[subset], batch_labels, str_labels=exp.labels) metrics_dict = metrics.evaluate() results[subset] = metrics.extract_values(metrics_dict) return results
def df_maker(epoch: int): config = json2dict(Path(('conf.json'))) methods = config['methods'] data_dir = Path(__file__).parent.parent / 'data/thesis' experiment_uids_path = data_dir / ('experiment_uids.json') exp_uids = json2dict(experiment_uids_path)['polymnist'] for method in methods: method_uids = exp_uids[method]['3_mods'] d = { 'missing_mod_scores': [], 'reconstr_mod_scores': [], 'random_prd_scores': [] } for method_uid in method_uids: epoch_results_dir = data_dir / 'experiments' / 'polymnist' / method / method_uid / 'epoch_results' prd_dict = json2dict(epoch_results_dir / f'{epoch}.json')['test_results']['prd_scores'] if prd_dict is None: tmpdirname = Path('/mnt/data/hendrik/mmnf_data/tempdir') tmpdirname.mkdir() experiment_dir = data_dir / 'experiments' / method / method_uid exp = load_experiment( experiment_dir, _id=method_uid, epoch=epoch, add_args={'dir_gen_eval_fid': tmpdirname}) args = exp.flags mm_vae = exp.mm_vae rand_gen = mm_vae.generate() d_loader = DataLoader(exp.dataset_test, batch_size=args.batch_size, shuffle=True, num_workers=exp.flags.dataloader_workers, drop_last=True) for iteration, (batch_d, batch_l) in tqdm(enumerate(d_loader), total=len(d_loader)): batch_d = dict_to_device(batch_d, exp.flags.device) save_generated_samples(exp, rand_gen, iteration, batch_d) _, joint_latent = mm_vae.inference(batch_d) cg = mm_vae.cond_generation(joint_latent) for subset, cond_val in cg.items(): save_generated_samples_singlegroup( exp, iteration, subset, cond_val) prd_dict = calc_prd_score(exp) ep_res_dict = json2dict(epoch_results_dir / f'{epoch}.json') ep_res_dict['test_results']['prd_scores'] = prd_dict dict2json(out_path=epoch_results_dir / f'{epoch}.json', d=ep_res_dict) tmpdirname.rmdir() d['random_prd_scores'].append( np.mean([ score for k, score in prd_dict.items() if k.startswith('random') ])) prd_dict = { k: v for k, v in prd_dict.items() if not k.startswith('random') } d['missing_mod_scores'].append( np.mean( [score for score in get_missing_mod_scores_prd(prd_dict)])) d['reconstr_mod_scores'].append( np.mean([ score for score in get_reconstr_mod_scores_prd(prd_dict) ])) yield { 'Method': method, 'Missing Mod': np.round(np.mean(d['missing_mod_scores']), 3), 'Reconstruction': np.round(np.mean(d['reconstr_mod_scores']), 3), 'Random': np.round(np.mean(d['random_prd_scores']), 3), 'Missing Mod__STDEV': np.round(np.std(d['missing_mod_scores']), 3), 'Reconstruction__STDEV': np.round(np.std(d['reconstr_mod_scores']), 3), 'Random__STDEV': np.round(np.std(d['random_prd_scores']), 3), }