Beispiel #1
0
def load_clfs(args) -> dict:
    clfs = {}
    for modality in MODALITIES:
        log.info(f'Loading {modality} clf.')
        if modality in ['PA', 'Lateral']:
            clf = ClfImg(args, get_labels(args.binary_labels)) if args.img_clf_type == 'resnet' else CheXNet(
                len(get_labels(args.binary_labels)))

            dir_clf = f'{args.dir_clf}/Mimic{args.img_size}_{args.img_clf_type}{"_bin_label" if args.binary_labels else ""}'
            clf_path = Path(glob.glob(f"{dir_clf}/clf_{MOD_MAPPING[modality]}*")[0])

        elif modality == 'text':
            dir_clf = args.dir_clf
            clf = ClfText(args, get_labels(args.binary_labels))
            if args.binary_labels:
                clf_path = \
                    [f for f in glob.glob(f'{dir_clf}/clf_{MOD_MAPPING[modality]}vocabsize_{args.vocab_size}*') if
                     'bin_label' in f][0]
            else:
                clf_path = \
                    [f for f in glob.glob(f'{dir_clf}/clf_{MOD_MAPPING[modality]}vocabsize_{args.vocab_size}*') if
                     'bin_label' not in f][0]

        else:
            raise NotImplementedError
        log.info(f'Loading state dict from {clf_path}.')
        clf.load_state_dict(torch.load(clf_path))
        clfs[modality] = clf.to(args.device)

    return clfs
Beispiel #2
0
    def run(self):
        log.info(f"starting producer, listening on port {self.port}")

        context = zmq.Context()

        with context.socket(PUSH) as self.socket:
            self.socket.bind(f"tcp://*:{self.port}")

            while self.producing:
                message = {
                    "text": "hello world",
                    "value": random(),
                }
                log.debug("producing message %s" % message)
                try:
                    self.socket.send_json(message)
                    sleep(1)
                except ZMQError as e:
                    if e.errno == zmq.ENOTSOCK:
                        log.debug("socket has been closed, terminating")
                    else:
                        log.error(e)

        log.info("terminating")
        context.term()
Beispiel #3
0
def write_to_config(values: dict):
    config = get_config()
    for k, v in values.items():
        config[k] = v
    config_path = Path(os.getcwd()) / f'configs/{get_config_path()}.json'
    log.info(f'Writing to {values} to config {config_path}.')
    with open(config_path, 'w') as json_file:
        json.dump(config, json_file, indent=4)
Beispiel #4
0
    def create_cond_gen_plot(in_mods='Lateral_PA'):
        subset = subsets[in_mods]
        plot = {
            **{f'in_{mod}': []
               for mod in mimic_experiment.modalities},
            **{f'out_{mod}': []
               for mod in mimic_experiment.modalities}
        }

        for idx in range(nbr_samples):
            sample = samples[idx]

            i_batch = {
                mod.name: sample[mod.name].unsqueeze(0)
                for mod in subset
            }
            latents = model.inference(i_batch, num_samples=1)
            c_in = latents['subsets'][in_mods]
            c_rep = utils.reparameterize(mu=c_in[0], logvar=c_in[1])
            cond_mod_in = {
                'content': c_rep,
                'style': {k: None
                          for k in mimic_experiment.modalities}
            }
            cond_gen_samples = model.generate_from_latents(cond_mod_in)
            for mod_key, mod in mods.items():
                plot[f'in_{mod_key}'].append(
                    mod.plot_data(mimic_experiment,
                                  sample[mod_key].squeeze(0)))
                plot[f'out_{mod_key}'].append(
                    mod.plot_data(mimic_experiment,
                                  cond_gen_samples[mod_key].squeeze(0)))

        rec = torch.Tensor()

        # first concatenate all input images, then all the output images
        for which, modalities in {'in': mods, 'out': mods}.items():
            for mod in modalities:
                for idx in range(nbr_samples):
                    if mod == 'text':
                        img = plot[f'{which}_{mod}'][idx].cpu().unsqueeze(0)
                    else:

                        img = plot[f'{which}_{mod}'][idx].cpu()
                        # pad the non text modalities such that they fit in a wider rectangle.
                        m = nn.ZeroPad2d((64, 64, 0, 0))
                        img = m(img.squeeze()).unsqueeze(0).unsqueeze(0)
                    rec = torch.cat((rec, img), 0)

        out_path = Path(
            mimic_experiment.flags.dir_cond_gen
        ) / f'{in_mods}{"_small" if nbr_samples < 5 else ""}.png'
        log.info(f'Saving image to {out_path}')

        _ = mimic.utils.plot.create_fig(out_path,
                                        img_data=rec,
                                        num_img_row=nbr_samples,
                                        save_figure=True)
Beispiel #5
0
def eval_vae_lr():
    config = get_config()

    # set seed
    SEED = config['seed']
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    random.seed(SEED)

    experiment_dir = config['experiment_dir']
    experiment_path = Path(os.getcwd()) / f'data/vae_model/{experiment_dir}'
    flags_path = experiment_path / 'flags.rar'
    FLAGS = torch.load(flags_path)
    FLAGS.save_figure = True
    FLAGS.dir_cond_gen = Path(__file__).parent.parent / 'data/cond_gen'
    FLAGS.text_gen_lastlayer = 'softmax'

    FLAGS = set_paths(FLAGS, config)
    FLAGS.use_clf = False
    FLAGS.batch_size = 30
    # FLAGS.undersample_dataset = True
    state_dict_path = experiment_path / 'checkpoints/0149/mm_vae'

    mimic_experiment = MimicExperiment(flags=FLAGS)
    mimic_experiment.tb_logger = Dummylogger()
    mimic_experiment.mm_vae.to(FLAGS.device)
    mimic_experiment.mm_vae.load_state_dict(
        state_dict=torch.load(state_dict_path))
    mimic_experiment.mm_vae.eval()

    results = {}
    for binay_labels in [True]:
        mimic_experiment.flags.binary_labels = binay_labels
        with torch.no_grad():
            clf_lr = train_clf_lr_all_subsets(mimic_experiment,
                                              weighted_sampler=False)
            predictions, gt = test_clf_lr_all_subsets(clf_lr, mimic_experiment)
            for subset in predictions:
                # calculate metrics
                metrics = Metrics(predictions[subset],
                                  gt,
                                  str_labels=get_labels(FLAGS.binary_labels))
                metrics_dict = metrics.evaluate()
                results[subset] = metrics_dict
                print(subset, ':', metrics_dict[config['eval_metric']][0])

    log.info(f'Lr eval results: {results}')

    out_path = Path(os.getcwd(
    )) / f'data/lr_eval_results{"_bin_label" if binay_labels else ""}.json'
    log.info(f'Saving lr eval test results to {out_path}')

    with open(out_path, 'w') as outfile:
        json.dump(results, outfile)
def init_data():
    """
    If data folder doesn't exists, downloads it and extracts it.
    """
    data_path = Path(os.getcwd()) / 'data'
    if not data_path.exists():
        with tempfile.TemporaryDirectory() as tmpdirname:
            zip_name = 'e7f9b8ef73f5.zip'
            wget_command = f'wget https://ppb.hendrikklug.xyz/{zip_name} -P {tmpdirname}/'
            log.info(f'Executing wget command: {wget_command}')
            os.system(wget_command)
            unzip_command = f'unzip {tmpdirname}/{zip_name} -d {data_path.parent}/'
            log.info(f'Unzipping data folder with: {unzip_command}')
            os.system(unzip_command)
    assert data_path.exists()
Beispiel #7
0
    def run(self):
        log.info(f"starting consumer, connecting to {self.producer_host}:{self.producer_port}")

        context = zmq.Context()

        with context.socket(PULL) as self.socket:
            self.socket.setsockopt(RCVTIMEO, self.receive_timeout_ms)
            self.socket.connect(f"tcp://{self.producer_host}:{self.producer_port}")

            while self.consuming:
                try:
                    message = self.socket.recv_json()
                    log.debug("consuming message: %s %s" % (message["text"], message["value"]))
                except ZMQError as e:
                    if e.errno == ENOTSOCK:
                        log.debug("socket has been closed")
                    elif e.errno == EAGAIN:
                        log.info(
                            f"no message received for %s ms, assuming producer has shutdown" % self.receive_timeout_ms
                        )
                        break
                    else:
                        log.error(e)
                        log.error(e.errno)

        log.info("terminating")
        context.term()
Beispiel #8
0
def test_dummy(flags, modality: str = 'PA'):
    """
    Trains and evaluates a dummy classifier on the test set as baseline.
    Returns the average precision values
    """
    log.info('Starting dummy test.')
    mimic_test = Mimic(flags, LABELS, split='eval')
    dataloader = torch.utils.data.DataLoader(mimic_test,
                                             batch_size=flags.batch_size,
                                             shuffle=True,
                                             num_workers=0,
                                             drop_last=True)
    list_batches = []
    list_labels = []
    list_precision_vals = []

    for idx, (batch_d, batch_l) in enumerate(dataloader):
        ground_truth = batch_l.cpu().data.numpy()
        clf_input = Variable(batch_d[modality]).cpu().data.numpy()
        list_batches.extend(clf_input)
        list_labels.extend(ground_truth)
    # dummy classifier has no partial_fit, so all the data must be fed at once
    dummy_clf = DummyClassifier(strategy="most_frequent")
    dummy_clf.fit(list_batches, list_labels)
    # test dummy clf
    for idx, (batch_d, batch_l) in enumerate(dataloader):
        clf_input = Variable(batch_d[modality]).cpu().data.numpy()
        predictions = dummy_clf.predict(clf_input)
        labels = np.array(np.reshape(batch_l,
                                     (batch_l.shape[0], len(LABELS)))).ravel()
        avg_precision = average_precision_score(labels, predictions.ravel())
        if not np.isnan(avg_precision):
            list_precision_vals.append(avg_precision)
        else:
            warnings.warn(
                f'avg_precision_{modality} has value {avg_precision} with labels: {labels.ravel()} and '
                f'prediction: {predictions.cpu().data.numpy().ravel()}')
    return list_precision_vals
Beispiel #9
0
# HK, 11.02.21
import json
import os
from pathlib import Path

import pandas as pd
from mimic.dataio.utils import filter_labels

from logger.logger import log

classes = ['Finding']
train_df = pd.read_csv(Path(os.getcwd()) / 'data/train_labels.csv')
df = filter_labels(labels=train_df,
                   undersample_dataset=False,
                   split='train',
                   which_labels=classes)

stats = {
    'Finding': int(df[df[classes] == 1].count().Finding),
    'NoFinding': int(df[df[classes] == 0].count().Finding)
}

out_path = Path(os.getcwd()) / 'data/dataset_stats.json'
log.info(f'Saving dataset stats to {out_path}')

with open(out_path, 'w') as outfile:
    json.dump(stats, outfile)
Beispiel #10
0
def test_vae_gen():
    config = get_config()
    # set seed
    SEED = config['seed']
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    random.seed(SEED)

    # experiment_dir = config['experiment_dir']
    experiment_dir = 'binary_labels-True_beta-0.01_weighted_sampler-False_class_dim-128_text_gen_lastlayer-softmax_2021_02_10_14_56_27_974859'

    experiment_path = Path(os.getcwd()) / f'data/vae_model/{experiment_dir}'
    flags_path = experiment_path / 'flags.rar'
    FLAGS = torch.load(flags_path)
    FLAGS.save_figure = False
    FLAGS.dir_cond_gen = Path(__file__).parent.parent / 'data/cond_gen'
    FLAGS.text_gen_lastlayer = 'softmax'

    FLAGS = set_paths(FLAGS)
    FLAGS.dir_clf = Path(os.getcwd()) / 'data/clfs/trained_classifiers_final'
    FLAGS.dir_gen_eval_fid = Path(os.getcwd()) / 'data/gen_eval_fid'
    FLAGS.use_clf = True
    FLAGS.batch_size = 30
    state_dict_path = experiment_path / 'checkpoints/0149/mm_vae'
    FLAGS.binary_labels = True
    mimic_experiment = MimicExperiment(flags=FLAGS)
    mimic_experiment.tb_logger = Dummylogger()
    mimic_experiment.mm_vae.to(FLAGS.device)
    mimic_experiment.mm_vae.load_state_dict(
        state_dict=torch.load(state_dict_path))
    mimic_experiment.mm_vae.eval()
    test_set = Mimic(FLAGS, mimic_experiment.labels, split='test')

    d_loader = DataLoader(test_set,
                          batch_size=FLAGS.batch_size,
                          shuffle=False,
                          num_workers=FLAGS.dataloader_workers,
                          drop_last=False)
    mm_vae = mimic_experiment.mm_vae
    mods = mimic_experiment.modalities
    subsets = mimic_experiment.subsets
    if '' in subsets:
        del subsets['']

    with torch.no_grad():
        batch_labels, gen_perf, cond_gen_classified = classify_generated_samples(
            FLAGS, d_loader, mimic_experiment, mm_vae, mods, subsets)

        gen_perf_cond = {}
        # compare the classification on the generated samples with the ground truth
        for l_idx, l_key in enumerate(mimic_experiment.labels):
            gen_perf_cond[l_key] = {}
            for s_key in subsets:
                gen_perf_cond[l_key][s_key] = {}
                for m_key in mods:
                    metrics = Metrics(cond_gen_classified[s_key][m_key],
                                      batch_labels,
                                      str_labels=get_labels(
                                          FLAGS.binary_labels))
                    gen_perf_cond[l_key][s_key][m_key] = metrics.evaluate()[
                        config['eval_metric']][0]

            eval_score = mimic_experiment.mean_eval_metric(
                gen_perf['random'][l_key])
            gen_perf['random'][l_key] = eval_score

        gen_perf['cond'] = gen_perf_cond

    results = gen_perf

    log.info(f'Gen eval results: {results}')

    out_path = Path(os.getcwd()) / 'data/gen_eval_results.json'
    log.info(f'Saving gen eval test results to {out_path}')
    with open(out_path, 'w') as outfile:
        json.dump(results, outfile)
Beispiel #11
0
def make_cond_gen_fig(nbr_samples=3):
    import mimic
    from mimic.utils import utils
    from mimic.utils.experiment import MimicExperiment
    from mimic.utils.filehandling import set_paths
    log.info(
        f'Starting generating cond gen fig with nbr_samples={nbr_samples}')
    config = get_config()

    # set seed
    SEED = config['seed']
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    random.seed(SEED)

    # experiment_dir = config['experiment_dir_bin']
    experiment_dir = 'binary_labels-True_beta-0.01_weighted_sampler-False_class_dim-128_text_gen_lastlayer-softmax_2021_02_10_14_56_27_974859'
    experiment_path = Path(
        __file__).parent.parent / f'data/vae_model/{experiment_dir}'
    flags_path = experiment_path / 'flags.rar'
    FLAGS = torch.load(flags_path)
    FLAGS.save_figure = True
    FLAGS.dir_cond_gen = Path(__file__).parent.parent / 'data/cond_gen'
    # FLAGS.text_gen_lastlayer = 'softmax'

    FLAGS = set_paths(FLAGS)
    FLAGS.use_clf = False
    FLAGS.binary_labels = False
    state_dict_path = experiment_path / 'checkpoints/0149/mm_vae'

    mimic_experiment = MimicExperiment(flags=FLAGS)
    mimic_experiment.mm_vae.to(FLAGS.device)
    mimic_experiment.mm_vae.load_state_dict(
        state_dict=torch.load(state_dict_path))
    mimic_experiment.mm_vae.eval()

    mimic_experiment.modalities['text'].plot_img_size = torch.Size(
        [1, 256, 128])

    samples = mimic_experiment.test_samples
    model = mimic_experiment.mm_vae
    mods = mimic_experiment.modalities
    subsets = mimic_experiment.subsets

    if not Path(mimic_experiment.flags.dir_cond_gen).exists():
        Path(mimic_experiment.flags.dir_cond_gen).mkdir()

    def create_cond_gen_plot(in_mods='Lateral_PA'):
        subset = subsets[in_mods]
        plot = {
            **{f'in_{mod}': []
               for mod in mimic_experiment.modalities},
            **{f'out_{mod}': []
               for mod in mimic_experiment.modalities}
        }

        for idx in range(nbr_samples):
            sample = samples[idx]

            i_batch = {
                mod.name: sample[mod.name].unsqueeze(0)
                for mod in subset
            }
            latents = model.inference(i_batch, num_samples=1)
            c_in = latents['subsets'][in_mods]
            c_rep = utils.reparameterize(mu=c_in[0], logvar=c_in[1])
            cond_mod_in = {
                'content': c_rep,
                'style': {k: None
                          for k in mimic_experiment.modalities}
            }
            cond_gen_samples = model.generate_from_latents(cond_mod_in)
            for mod_key, mod in mods.items():
                plot[f'in_{mod_key}'].append(
                    mod.plot_data(mimic_experiment,
                                  sample[mod_key].squeeze(0)))
                plot[f'out_{mod_key}'].append(
                    mod.plot_data(mimic_experiment,
                                  cond_gen_samples[mod_key].squeeze(0)))

        rec = torch.Tensor()

        # first concatenate all input images, then all the output images
        for which, modalities in {'in': mods, 'out': mods}.items():
            for mod in modalities:
                for idx in range(nbr_samples):
                    if mod == 'text':
                        img = plot[f'{which}_{mod}'][idx].cpu().unsqueeze(0)
                    else:

                        img = plot[f'{which}_{mod}'][idx].cpu()
                        # pad the non text modalities such that they fit in a wider rectangle.
                        m = nn.ZeroPad2d((64, 64, 0, 0))
                        img = m(img.squeeze()).unsqueeze(0).unsqueeze(0)
                    rec = torch.cat((rec, img), 0)

        out_path = Path(
            mimic_experiment.flags.dir_cond_gen
        ) / f'{in_mods}{"_small" if nbr_samples < 5 else ""}.png'
        log.info(f'Saving image to {out_path}')

        _ = mimic.utils.plot.create_fig(out_path,
                                        img_data=rec,
                                        num_img_row=nbr_samples,
                                        save_figure=True)

    for in_mod in mimic_experiment.subsets:
        if in_mod:
            # for in_mod in ['Lateral_text']:
            create_cond_gen_plot(in_mod)
Beispiel #12
0
    dummy_clf.fit(list_batches, list_labels)
    # test dummy clf
    for idx, (batch_d, batch_l) in enumerate(dataloader):
        clf_input = Variable(batch_d[modality]).cpu().data.numpy()
        predictions = dummy_clf.predict(clf_input)
        labels = np.array(np.reshape(batch_l,
                                     (batch_l.shape[0], len(LABELS)))).ravel()
        avg_precision = average_precision_score(labels, predictions.ravel())
        if not np.isnan(avg_precision):
            list_precision_vals.append(avg_precision)
        else:
            warnings.warn(
                f'avg_precision_{modality} has value {avg_precision} with labels: {labels.ravel()} and '
                f'prediction: {predictions.cpu().data.numpy().ravel()}')
    return list_precision_vals


mimic_config_path = Path(
    os.getcwd()) / f'prepare/mimic_configs/{get_config_path()}.json'
FLAGS = update_flags_with_config(mimic_config_path)
out_path = f'{FLAGS.dir_clf}/clf_test_results.json'

with open(out_path, 'r') as outfile:
    results = json.load(outfile)

log.info(f'Saving dummy classifier test results to {out_path}')
results = {**results, 'rand_perf': np.mean(test_dummy(FLAGS, modality='PA'))}

with open(out_path, 'w') as outfile:
    json.dump(results, outfile)
Beispiel #13
0
def test_clf_lr_all_subsets(clf_lr, exp):
    """
    Test the classifiers that were trained on latent representations.
    """
    args = exp.flags
    mm_vae = exp.mm_vae
    mm_vae.eval()
    subsets = exp.subsets
    if '' in subsets:
        del subsets['']

    test_set = Mimic(args, exp.labels, split='test')

    d_loader = DataLoader(test_set,
                          batch_size=exp.flags.batch_size,
                          shuffle=False,
                          num_workers=0,
                          drop_last=False)

    if exp.flags.steps_per_training_epoch > 0:
        training_steps = exp.flags.steps_per_training_epoch
    else:
        training_steps = len(d_loader)
    log.info(
        f'Creating {training_steps} batches of latent representations for classifier testing '
        f'with a batch_size of {exp.flags.batch_size}.')

    clf_predictions = {subset: torch.Tensor() for subset in subsets}

    batch_labels = torch.Tensor()

    for iteration, (batch_d, batch_l) in enumerate(d_loader):
        if iteration > training_steps:
            break
        batch_labels = torch.cat((batch_labels, batch_l), 0)

        batch_d = dict_to_device(batch_d, exp.flags.device)

        inferred = mm_vae.module.inference(
            batch_d) if args.distributed else mm_vae.inference(batch_d)
        lr_subsets = inferred['subsets']
        data_test = {
            key: lr_subsets[key][0].cpu().data.numpy()
            for key in lr_subsets
        }

        clf_predictions_batch = classify_latent_representations(
            exp, clf_lr, data_test)
        clf_predictions_batch: Mapping[str, Mapping[str, np.array]]

        for subset in subsets:
            clf_predictions_batch_subset = torch.cat(
                tuple(
                    torch.tensor(clf_predictions_batch[label]
                                 [subset]).unsqueeze(1)
                    for label in get_labels(args.binary_labels)), 1)

            clf_predictions[subset] = torch.cat(
                [clf_predictions[subset], clf_predictions_batch_subset], 0)

    return clf_predictions, batch_labels
Beispiel #14
0
            else:
                clf_path = \
                    [f for f in glob.glob(f'{dir_clf}/clf_{MOD_MAPPING[modality]}vocabsize_{args.vocab_size}*') if
                     'bin_label' not in f][0]

        else:
            raise NotImplementedError
        log.info(f'Loading state dict from {clf_path}.')
        clf.load_state_dict(torch.load(clf_path))
        clfs[modality] = clf.to(args.device)

    return clfs


if __name__ == '__main__':
    log.info('Starting classifier testing.')
    config = get_config()
    mimic_config_path = Path(os.getcwd()) / f'prepare/mimic_configs/{get_config_str()}.json'

    FLAGS = update_flags_with_config(mimic_config_path)
    FLAGS.dir_clf = Path(os.getcwd()) / f'data/clfs/{config["dir_clf"]}'
    FLAGS.reduce_lr_on_plateau = True
    FLAGS.fixed_extractor = True
    FLAGS.normalization = False
    FLAGS = expand_paths(FLAGS)
    use_cuda = torch.cuda.is_available()
    FLAGS.device = torch.device('cuda' if use_cuda else 'cpu')
    FLAGS.binary_labels = True
    FLAGS.img_clf_type = 'resnet'

    results = test_clfs(FLAGS, 128, 'word')
# run with /home/hendrik/miniconda3/envs/mimic/bin/python prepare/run_prepare_loop.py

parent_dir = Path('/mnt/data/hendrik/mimic_scratch/mimic/moe/test_beta_bigsearch')
config_path = Path(os.getcwd()) / 'configs/bartholin.json'

experiment_df = pd.read_csv(Path(os.getcwd()) / 'data/experiments_dataframe.csv')

for experiment_dir in parent_dir.iterdir():

    experiment_uid = experiment_dir.name
    if experiment_uid in experiment_df['experiment_uid'].tolist():

        dest_dir = Path(os.getcwd()) / f'data/vae_model/{experiment_uid}'
        if not dest_dir.exists():
            symlink_command = f'ln -s {experiment_dir} {dest_dir}'
            log.info(f'Running {symlink_command}')
            os.system(symlink_command)
        with open(config_path, 'r') as json_file:
            config = json.load(json_file)
        config['experiment_dir'] = experiment_uid
        config['experiment_dir_bin'] = experiment_uid

        with open(config_path, 'w') as json_file:
            json.dump(config, json_file)

        make_cond_gen_fig()
        test_vae_gen()

        os.system('./prepare/run_loop.sh')
    else:
        print(f'{experiment_uid} is not found in experiment_df')