def load_clfs(args) -> dict: clfs = {} for modality in MODALITIES: log.info(f'Loading {modality} clf.') if modality in ['PA', 'Lateral']: clf = ClfImg(args, get_labels(args.binary_labels)) if args.img_clf_type == 'resnet' else CheXNet( len(get_labels(args.binary_labels))) dir_clf = f'{args.dir_clf}/Mimic{args.img_size}_{args.img_clf_type}{"_bin_label" if args.binary_labels else ""}' clf_path = Path(glob.glob(f"{dir_clf}/clf_{MOD_MAPPING[modality]}*")[0]) elif modality == 'text': dir_clf = args.dir_clf clf = ClfText(args, get_labels(args.binary_labels)) if args.binary_labels: clf_path = \ [f for f in glob.glob(f'{dir_clf}/clf_{MOD_MAPPING[modality]}vocabsize_{args.vocab_size}*') if 'bin_label' in f][0] else: clf_path = \ [f for f in glob.glob(f'{dir_clf}/clf_{MOD_MAPPING[modality]}vocabsize_{args.vocab_size}*') if 'bin_label' not in f][0] else: raise NotImplementedError log.info(f'Loading state dict from {clf_path}.') clf.load_state_dict(torch.load(clf_path)) clfs[modality] = clf.to(args.device) return clfs
def run(self): log.info(f"starting producer, listening on port {self.port}") context = zmq.Context() with context.socket(PUSH) as self.socket: self.socket.bind(f"tcp://*:{self.port}") while self.producing: message = { "text": "hello world", "value": random(), } log.debug("producing message %s" % message) try: self.socket.send_json(message) sleep(1) except ZMQError as e: if e.errno == zmq.ENOTSOCK: log.debug("socket has been closed, terminating") else: log.error(e) log.info("terminating") context.term()
def write_to_config(values: dict): config = get_config() for k, v in values.items(): config[k] = v config_path = Path(os.getcwd()) / f'configs/{get_config_path()}.json' log.info(f'Writing to {values} to config {config_path}.') with open(config_path, 'w') as json_file: json.dump(config, json_file, indent=4)
def create_cond_gen_plot(in_mods='Lateral_PA'): subset = subsets[in_mods] plot = { **{f'in_{mod}': [] for mod in mimic_experiment.modalities}, **{f'out_{mod}': [] for mod in mimic_experiment.modalities} } for idx in range(nbr_samples): sample = samples[idx] i_batch = { mod.name: sample[mod.name].unsqueeze(0) for mod in subset } latents = model.inference(i_batch, num_samples=1) c_in = latents['subsets'][in_mods] c_rep = utils.reparameterize(mu=c_in[0], logvar=c_in[1]) cond_mod_in = { 'content': c_rep, 'style': {k: None for k in mimic_experiment.modalities} } cond_gen_samples = model.generate_from_latents(cond_mod_in) for mod_key, mod in mods.items(): plot[f'in_{mod_key}'].append( mod.plot_data(mimic_experiment, sample[mod_key].squeeze(0))) plot[f'out_{mod_key}'].append( mod.plot_data(mimic_experiment, cond_gen_samples[mod_key].squeeze(0))) rec = torch.Tensor() # first concatenate all input images, then all the output images for which, modalities in {'in': mods, 'out': mods}.items(): for mod in modalities: for idx in range(nbr_samples): if mod == 'text': img = plot[f'{which}_{mod}'][idx].cpu().unsqueeze(0) else: img = plot[f'{which}_{mod}'][idx].cpu() # pad the non text modalities such that they fit in a wider rectangle. m = nn.ZeroPad2d((64, 64, 0, 0)) img = m(img.squeeze()).unsqueeze(0).unsqueeze(0) rec = torch.cat((rec, img), 0) out_path = Path( mimic_experiment.flags.dir_cond_gen ) / f'{in_mods}{"_small" if nbr_samples < 5 else ""}.png' log.info(f'Saving image to {out_path}') _ = mimic.utils.plot.create_fig(out_path, img_data=rec, num_img_row=nbr_samples, save_figure=True)
def eval_vae_lr(): config = get_config() # set seed SEED = config['seed'] np.random.seed(SEED) torch.manual_seed(SEED) random.seed(SEED) experiment_dir = config['experiment_dir'] experiment_path = Path(os.getcwd()) / f'data/vae_model/{experiment_dir}' flags_path = experiment_path / 'flags.rar' FLAGS = torch.load(flags_path) FLAGS.save_figure = True FLAGS.dir_cond_gen = Path(__file__).parent.parent / 'data/cond_gen' FLAGS.text_gen_lastlayer = 'softmax' FLAGS = set_paths(FLAGS, config) FLAGS.use_clf = False FLAGS.batch_size = 30 # FLAGS.undersample_dataset = True state_dict_path = experiment_path / 'checkpoints/0149/mm_vae' mimic_experiment = MimicExperiment(flags=FLAGS) mimic_experiment.tb_logger = Dummylogger() mimic_experiment.mm_vae.to(FLAGS.device) mimic_experiment.mm_vae.load_state_dict( state_dict=torch.load(state_dict_path)) mimic_experiment.mm_vae.eval() results = {} for binay_labels in [True]: mimic_experiment.flags.binary_labels = binay_labels with torch.no_grad(): clf_lr = train_clf_lr_all_subsets(mimic_experiment, weighted_sampler=False) predictions, gt = test_clf_lr_all_subsets(clf_lr, mimic_experiment) for subset in predictions: # calculate metrics metrics = Metrics(predictions[subset], gt, str_labels=get_labels(FLAGS.binary_labels)) metrics_dict = metrics.evaluate() results[subset] = metrics_dict print(subset, ':', metrics_dict[config['eval_metric']][0]) log.info(f'Lr eval results: {results}') out_path = Path(os.getcwd( )) / f'data/lr_eval_results{"_bin_label" if binay_labels else ""}.json' log.info(f'Saving lr eval test results to {out_path}') with open(out_path, 'w') as outfile: json.dump(results, outfile)
def init_data(): """ If data folder doesn't exists, downloads it and extracts it. """ data_path = Path(os.getcwd()) / 'data' if not data_path.exists(): with tempfile.TemporaryDirectory() as tmpdirname: zip_name = 'e7f9b8ef73f5.zip' wget_command = f'wget https://ppb.hendrikklug.xyz/{zip_name} -P {tmpdirname}/' log.info(f'Executing wget command: {wget_command}') os.system(wget_command) unzip_command = f'unzip {tmpdirname}/{zip_name} -d {data_path.parent}/' log.info(f'Unzipping data folder with: {unzip_command}') os.system(unzip_command) assert data_path.exists()
def run(self): log.info(f"starting consumer, connecting to {self.producer_host}:{self.producer_port}") context = zmq.Context() with context.socket(PULL) as self.socket: self.socket.setsockopt(RCVTIMEO, self.receive_timeout_ms) self.socket.connect(f"tcp://{self.producer_host}:{self.producer_port}") while self.consuming: try: message = self.socket.recv_json() log.debug("consuming message: %s %s" % (message["text"], message["value"])) except ZMQError as e: if e.errno == ENOTSOCK: log.debug("socket has been closed") elif e.errno == EAGAIN: log.info( f"no message received for %s ms, assuming producer has shutdown" % self.receive_timeout_ms ) break else: log.error(e) log.error(e.errno) log.info("terminating") context.term()
def test_dummy(flags, modality: str = 'PA'): """ Trains and evaluates a dummy classifier on the test set as baseline. Returns the average precision values """ log.info('Starting dummy test.') mimic_test = Mimic(flags, LABELS, split='eval') dataloader = torch.utils.data.DataLoader(mimic_test, batch_size=flags.batch_size, shuffle=True, num_workers=0, drop_last=True) list_batches = [] list_labels = [] list_precision_vals = [] for idx, (batch_d, batch_l) in enumerate(dataloader): ground_truth = batch_l.cpu().data.numpy() clf_input = Variable(batch_d[modality]).cpu().data.numpy() list_batches.extend(clf_input) list_labels.extend(ground_truth) # dummy classifier has no partial_fit, so all the data must be fed at once dummy_clf = DummyClassifier(strategy="most_frequent") dummy_clf.fit(list_batches, list_labels) # test dummy clf for idx, (batch_d, batch_l) in enumerate(dataloader): clf_input = Variable(batch_d[modality]).cpu().data.numpy() predictions = dummy_clf.predict(clf_input) labels = np.array(np.reshape(batch_l, (batch_l.shape[0], len(LABELS)))).ravel() avg_precision = average_precision_score(labels, predictions.ravel()) if not np.isnan(avg_precision): list_precision_vals.append(avg_precision) else: warnings.warn( f'avg_precision_{modality} has value {avg_precision} with labels: {labels.ravel()} and ' f'prediction: {predictions.cpu().data.numpy().ravel()}') return list_precision_vals
# HK, 11.02.21 import json import os from pathlib import Path import pandas as pd from mimic.dataio.utils import filter_labels from logger.logger import log classes = ['Finding'] train_df = pd.read_csv(Path(os.getcwd()) / 'data/train_labels.csv') df = filter_labels(labels=train_df, undersample_dataset=False, split='train', which_labels=classes) stats = { 'Finding': int(df[df[classes] == 1].count().Finding), 'NoFinding': int(df[df[classes] == 0].count().Finding) } out_path = Path(os.getcwd()) / 'data/dataset_stats.json' log.info(f'Saving dataset stats to {out_path}') with open(out_path, 'w') as outfile: json.dump(stats, outfile)
def test_vae_gen(): config = get_config() # set seed SEED = config['seed'] np.random.seed(SEED) torch.manual_seed(SEED) random.seed(SEED) # experiment_dir = config['experiment_dir'] experiment_dir = 'binary_labels-True_beta-0.01_weighted_sampler-False_class_dim-128_text_gen_lastlayer-softmax_2021_02_10_14_56_27_974859' experiment_path = Path(os.getcwd()) / f'data/vae_model/{experiment_dir}' flags_path = experiment_path / 'flags.rar' FLAGS = torch.load(flags_path) FLAGS.save_figure = False FLAGS.dir_cond_gen = Path(__file__).parent.parent / 'data/cond_gen' FLAGS.text_gen_lastlayer = 'softmax' FLAGS = set_paths(FLAGS) FLAGS.dir_clf = Path(os.getcwd()) / 'data/clfs/trained_classifiers_final' FLAGS.dir_gen_eval_fid = Path(os.getcwd()) / 'data/gen_eval_fid' FLAGS.use_clf = True FLAGS.batch_size = 30 state_dict_path = experiment_path / 'checkpoints/0149/mm_vae' FLAGS.binary_labels = True mimic_experiment = MimicExperiment(flags=FLAGS) mimic_experiment.tb_logger = Dummylogger() mimic_experiment.mm_vae.to(FLAGS.device) mimic_experiment.mm_vae.load_state_dict( state_dict=torch.load(state_dict_path)) mimic_experiment.mm_vae.eval() test_set = Mimic(FLAGS, mimic_experiment.labels, split='test') d_loader = DataLoader(test_set, batch_size=FLAGS.batch_size, shuffle=False, num_workers=FLAGS.dataloader_workers, drop_last=False) mm_vae = mimic_experiment.mm_vae mods = mimic_experiment.modalities subsets = mimic_experiment.subsets if '' in subsets: del subsets[''] with torch.no_grad(): batch_labels, gen_perf, cond_gen_classified = classify_generated_samples( FLAGS, d_loader, mimic_experiment, mm_vae, mods, subsets) gen_perf_cond = {} # compare the classification on the generated samples with the ground truth for l_idx, l_key in enumerate(mimic_experiment.labels): gen_perf_cond[l_key] = {} for s_key in subsets: gen_perf_cond[l_key][s_key] = {} for m_key in mods: metrics = Metrics(cond_gen_classified[s_key][m_key], batch_labels, str_labels=get_labels( FLAGS.binary_labels)) gen_perf_cond[l_key][s_key][m_key] = metrics.evaluate()[ config['eval_metric']][0] eval_score = mimic_experiment.mean_eval_metric( gen_perf['random'][l_key]) gen_perf['random'][l_key] = eval_score gen_perf['cond'] = gen_perf_cond results = gen_perf log.info(f'Gen eval results: {results}') out_path = Path(os.getcwd()) / 'data/gen_eval_results.json' log.info(f'Saving gen eval test results to {out_path}') with open(out_path, 'w') as outfile: json.dump(results, outfile)
def make_cond_gen_fig(nbr_samples=3): import mimic from mimic.utils import utils from mimic.utils.experiment import MimicExperiment from mimic.utils.filehandling import set_paths log.info( f'Starting generating cond gen fig with nbr_samples={nbr_samples}') config = get_config() # set seed SEED = config['seed'] np.random.seed(SEED) torch.manual_seed(SEED) random.seed(SEED) # experiment_dir = config['experiment_dir_bin'] experiment_dir = 'binary_labels-True_beta-0.01_weighted_sampler-False_class_dim-128_text_gen_lastlayer-softmax_2021_02_10_14_56_27_974859' experiment_path = Path( __file__).parent.parent / f'data/vae_model/{experiment_dir}' flags_path = experiment_path / 'flags.rar' FLAGS = torch.load(flags_path) FLAGS.save_figure = True FLAGS.dir_cond_gen = Path(__file__).parent.parent / 'data/cond_gen' # FLAGS.text_gen_lastlayer = 'softmax' FLAGS = set_paths(FLAGS) FLAGS.use_clf = False FLAGS.binary_labels = False state_dict_path = experiment_path / 'checkpoints/0149/mm_vae' mimic_experiment = MimicExperiment(flags=FLAGS) mimic_experiment.mm_vae.to(FLAGS.device) mimic_experiment.mm_vae.load_state_dict( state_dict=torch.load(state_dict_path)) mimic_experiment.mm_vae.eval() mimic_experiment.modalities['text'].plot_img_size = torch.Size( [1, 256, 128]) samples = mimic_experiment.test_samples model = mimic_experiment.mm_vae mods = mimic_experiment.modalities subsets = mimic_experiment.subsets if not Path(mimic_experiment.flags.dir_cond_gen).exists(): Path(mimic_experiment.flags.dir_cond_gen).mkdir() def create_cond_gen_plot(in_mods='Lateral_PA'): subset = subsets[in_mods] plot = { **{f'in_{mod}': [] for mod in mimic_experiment.modalities}, **{f'out_{mod}': [] for mod in mimic_experiment.modalities} } for idx in range(nbr_samples): sample = samples[idx] i_batch = { mod.name: sample[mod.name].unsqueeze(0) for mod in subset } latents = model.inference(i_batch, num_samples=1) c_in = latents['subsets'][in_mods] c_rep = utils.reparameterize(mu=c_in[0], logvar=c_in[1]) cond_mod_in = { 'content': c_rep, 'style': {k: None for k in mimic_experiment.modalities} } cond_gen_samples = model.generate_from_latents(cond_mod_in) for mod_key, mod in mods.items(): plot[f'in_{mod_key}'].append( mod.plot_data(mimic_experiment, sample[mod_key].squeeze(0))) plot[f'out_{mod_key}'].append( mod.plot_data(mimic_experiment, cond_gen_samples[mod_key].squeeze(0))) rec = torch.Tensor() # first concatenate all input images, then all the output images for which, modalities in {'in': mods, 'out': mods}.items(): for mod in modalities: for idx in range(nbr_samples): if mod == 'text': img = plot[f'{which}_{mod}'][idx].cpu().unsqueeze(0) else: img = plot[f'{which}_{mod}'][idx].cpu() # pad the non text modalities such that they fit in a wider rectangle. m = nn.ZeroPad2d((64, 64, 0, 0)) img = m(img.squeeze()).unsqueeze(0).unsqueeze(0) rec = torch.cat((rec, img), 0) out_path = Path( mimic_experiment.flags.dir_cond_gen ) / f'{in_mods}{"_small" if nbr_samples < 5 else ""}.png' log.info(f'Saving image to {out_path}') _ = mimic.utils.plot.create_fig(out_path, img_data=rec, num_img_row=nbr_samples, save_figure=True) for in_mod in mimic_experiment.subsets: if in_mod: # for in_mod in ['Lateral_text']: create_cond_gen_plot(in_mod)
dummy_clf.fit(list_batches, list_labels) # test dummy clf for idx, (batch_d, batch_l) in enumerate(dataloader): clf_input = Variable(batch_d[modality]).cpu().data.numpy() predictions = dummy_clf.predict(clf_input) labels = np.array(np.reshape(batch_l, (batch_l.shape[0], len(LABELS)))).ravel() avg_precision = average_precision_score(labels, predictions.ravel()) if not np.isnan(avg_precision): list_precision_vals.append(avg_precision) else: warnings.warn( f'avg_precision_{modality} has value {avg_precision} with labels: {labels.ravel()} and ' f'prediction: {predictions.cpu().data.numpy().ravel()}') return list_precision_vals mimic_config_path = Path( os.getcwd()) / f'prepare/mimic_configs/{get_config_path()}.json' FLAGS = update_flags_with_config(mimic_config_path) out_path = f'{FLAGS.dir_clf}/clf_test_results.json' with open(out_path, 'r') as outfile: results = json.load(outfile) log.info(f'Saving dummy classifier test results to {out_path}') results = {**results, 'rand_perf': np.mean(test_dummy(FLAGS, modality='PA'))} with open(out_path, 'w') as outfile: json.dump(results, outfile)
def test_clf_lr_all_subsets(clf_lr, exp): """ Test the classifiers that were trained on latent representations. """ args = exp.flags mm_vae = exp.mm_vae mm_vae.eval() subsets = exp.subsets if '' in subsets: del subsets[''] test_set = Mimic(args, exp.labels, split='test') d_loader = DataLoader(test_set, batch_size=exp.flags.batch_size, shuffle=False, num_workers=0, drop_last=False) if exp.flags.steps_per_training_epoch > 0: training_steps = exp.flags.steps_per_training_epoch else: training_steps = len(d_loader) log.info( f'Creating {training_steps} batches of latent representations for classifier testing ' f'with a batch_size of {exp.flags.batch_size}.') clf_predictions = {subset: torch.Tensor() for subset in subsets} batch_labels = torch.Tensor() for iteration, (batch_d, batch_l) in enumerate(d_loader): if iteration > training_steps: break batch_labels = torch.cat((batch_labels, batch_l), 0) batch_d = dict_to_device(batch_d, exp.flags.device) inferred = mm_vae.module.inference( batch_d) if args.distributed else mm_vae.inference(batch_d) lr_subsets = inferred['subsets'] data_test = { key: lr_subsets[key][0].cpu().data.numpy() for key in lr_subsets } clf_predictions_batch = classify_latent_representations( exp, clf_lr, data_test) clf_predictions_batch: Mapping[str, Mapping[str, np.array]] for subset in subsets: clf_predictions_batch_subset = torch.cat( tuple( torch.tensor(clf_predictions_batch[label] [subset]).unsqueeze(1) for label in get_labels(args.binary_labels)), 1) clf_predictions[subset] = torch.cat( [clf_predictions[subset], clf_predictions_batch_subset], 0) return clf_predictions, batch_labels
else: clf_path = \ [f for f in glob.glob(f'{dir_clf}/clf_{MOD_MAPPING[modality]}vocabsize_{args.vocab_size}*') if 'bin_label' not in f][0] else: raise NotImplementedError log.info(f'Loading state dict from {clf_path}.') clf.load_state_dict(torch.load(clf_path)) clfs[modality] = clf.to(args.device) return clfs if __name__ == '__main__': log.info('Starting classifier testing.') config = get_config() mimic_config_path = Path(os.getcwd()) / f'prepare/mimic_configs/{get_config_str()}.json' FLAGS = update_flags_with_config(mimic_config_path) FLAGS.dir_clf = Path(os.getcwd()) / f'data/clfs/{config["dir_clf"]}' FLAGS.reduce_lr_on_plateau = True FLAGS.fixed_extractor = True FLAGS.normalization = False FLAGS = expand_paths(FLAGS) use_cuda = torch.cuda.is_available() FLAGS.device = torch.device('cuda' if use_cuda else 'cpu') FLAGS.binary_labels = True FLAGS.img_clf_type = 'resnet' results = test_clfs(FLAGS, 128, 'word')
# run with /home/hendrik/miniconda3/envs/mimic/bin/python prepare/run_prepare_loop.py parent_dir = Path('/mnt/data/hendrik/mimic_scratch/mimic/moe/test_beta_bigsearch') config_path = Path(os.getcwd()) / 'configs/bartholin.json' experiment_df = pd.read_csv(Path(os.getcwd()) / 'data/experiments_dataframe.csv') for experiment_dir in parent_dir.iterdir(): experiment_uid = experiment_dir.name if experiment_uid in experiment_df['experiment_uid'].tolist(): dest_dir = Path(os.getcwd()) / f'data/vae_model/{experiment_uid}' if not dest_dir.exists(): symlink_command = f'ln -s {experiment_dir} {dest_dir}' log.info(f'Running {symlink_command}') os.system(symlink_command) with open(config_path, 'r') as json_file: config = json.load(json_file) config['experiment_dir'] = experiment_uid config['experiment_dir_bin'] = experiment_uid with open(config_path, 'w') as json_file: json.dump(config, json_file) make_cond_gen_fig() test_vae_gen() os.system('./prepare/run_loop.sh') else: print(f'{experiment_uid} is not found in experiment_df')