예제 #1
0
 def test_language_models(self, input_output_tmaps, tmpdir):
     params = DEFAULT_PARAMS.copy()
     m = make_multimodal_multitask_model(
         tensor_maps_in=input_output_tmaps[0],
         tensor_maps_out=input_output_tmaps[1],
         **params)
     assert_model_trains(input_output_tmaps[0], input_output_tmaps[1], m)
     path = os.path.join(tmpdir, f'lstm{MODEL_EXT}')
     m.save(path)
     params['model_file'] = path
     make_multimodal_multitask_model(
         input_output_tmaps[0],
         input_output_tmaps[1],
         **params,
     )
예제 #2
0
 def test_load_unimodal(self, tmpdir, input_tmap, output_tmap):
     params = DEFAULT_PARAMS.copy()
     m = make_multimodal_multitask_model(
         [input_tmap],
         [output_tmap],
         **params,
     )
     path = os.path.join(tmpdir, f'm{MODEL_EXT}')
     m.save(path)
     params['model_file'] = path
     make_multimodal_multitask_model(
         [input_tmap],
         [output_tmap],
         **DEFAULT_PARAMS,
     )
예제 #3
0
def assert_model_trains(input_tmaps: List[TensorMap],
                        output_tmaps: List[TensorMap],
                        m: Optional[tf.keras.Model] = None,
                        skip_shape_check: bool = False):
    if m is None:
        m = make_multimodal_multitask_model(
            input_tmaps,
            output_tmaps,
            **DEFAULT_PARAMS,
        )
    if not skip_shape_check:
        for tmap, tensor in zip(input_tmaps, m.inputs):
            assert tensor.shape[1:] == tmap.shape
            assert tensor.shape[1:] == tmap.shape
        for tmap, tensor in zip(parent_sort(output_tmaps), m.outputs):
            assert tensor.shape[1:] == tmap.shape
            assert tensor.shape[1:] == tmap.shape
    data = make_training_data(input_tmaps, output_tmaps)
    history = m.fit(data,
                    steps_per_epoch=2,
                    epochs=2,
                    validation_data=data,
                    validation_steps=2)
    for tmap in output_tmaps:
        for metric in tmap.metrics:
            metric_name = metric if type(metric) == str else metric.__name__
            name = f'{tmap.output_name()}_{metric_name}' if len(
                output_tmaps) > 1 else metric_name
            assert name in history.history
예제 #4
0
 def test_load_multimodal(self, tmpdir, input_tmaps: List[TensorMap],
                          output_tmaps: List[TensorMap]):
     m = make_multimodal_multitask_model(
         input_tmaps,
         output_tmaps,
         **DEFAULT_PARAMS,
     )
     path = os.path.join(tmpdir, f'm{MODEL_EXT}')
     m.save(path)
     params = DEFAULT_PARAMS.copy()
     params['model_file'] = path
     make_multimodal_multitask_model(
         input_tmaps,
         output_tmaps,
         **params,
     )
예제 #5
0
 def test_load_custom_activations(self, tmpdir, activation):
     inp, out = CONTINUOUS_TMAPS[:2], CATEGORICAL_TMAPS[:2]
     params = DEFAULT_PARAMS.copy()
     params['activation'] = activation
     m = make_multimodal_multitask_model(
         inp,
         out,
         **params,
     )
     path = os.path.join(tmpdir, f'm{MODEL_EXT}')
     m.save(path)
     params['model_file'] = path
     make_multimodal_multitask_model(
         inp,
         out,
         **params,
     )
예제 #6
0
 def test_no_dense_layers(self):
     params = DEFAULT_PARAMS.copy()
     params['dense_layers'] = []
     inp, out = CONTINUOUS_TMAPS[:2], CATEGORICAL_TMAPS[:2]
     m = make_multimodal_multitask_model(
         inp,
         out,
         **DEFAULT_PARAMS,
     )
     assert_model_trains(inp, out, m)
예제 #7
0
 def test_u_connect_segment(self):
     params = DEFAULT_PARAMS.copy()
     params['pool_x'] = params['pool_y'] = 2
     params['u_connect'] = defaultdict(set, {SEGMENT_IN: {SEGMENT_OUT}})
     m = make_multimodal_multitask_model(
         [SEGMENT_IN],
         [SEGMENT_OUT],
         **params,
     )
     assert_model_trains([SEGMENT_IN], [SEGMENT_OUT], m)
예제 #8
0
 def test_u_connect_no_bottleneck(self):
     params = DEFAULT_PARAMS.copy()
     params['pool_x'] = params['pool_y'] = 2
     params['bottleneck_type'] = BottleneckType.NoBottleNeck
     params['u_connect'] = defaultdict(set, {SEGMENT_IN: {SEGMENT_OUT}})
     m = make_multimodal_multitask_model(
         [SEGMENT_IN, TMAPS_UP_TO_4D[0]],
         [SEGMENT_OUT],
         **params,
     )
     assert_model_trains([SEGMENT_IN, TMAPS_UP_TO_4D[0]], [SEGMENT_OUT], m)
예제 #9
0
 def test_u_connect_adaptive_normalization(self):
     params = DEFAULT_PARAMS.copy()
     params['pool_x'] = params['pool_y'] = 2
     params['bottleneck_type'] = BottleneckType.GlobalAveragePoolStructured
     params['u_connect'] = defaultdict(set, {SEGMENT_IN: {SEGMENT_OUT}})
     m = make_multimodal_multitask_model(
         [SEGMENT_IN, TMAPS_UP_TO_4D[0]],
         [SEGMENT_OUT],
         **params,
     )
     assert_model_trains([SEGMENT_IN, TMAPS_UP_TO_4D[0]], [SEGMENT_OUT], m)
예제 #10
0
 def test_multimodal_multitask_variational(self, input_output_tmaps,
                                           tmpdir):
     """
     Tests 1d->2d, 2d->1d, (1d,2d)->(1d,2d)
     """
     params = DEFAULT_PARAMS.copy()
     params['bottleneck_type'] = BottleneckType.Variational
     params['pool_x'] = params['pool_y'] = 2
     m = make_multimodal_multitask_model(input_output_tmaps[0],
                                         input_output_tmaps[1], **params)
     assert_model_trains(input_output_tmaps[0], input_output_tmaps[1], m)
     m.save(os.path.join(tmpdir, 'vae.h5'))
     path = os.path.join(tmpdir, f'm{MODEL_EXT}')
     m.save(path)
     params['model_file'] = path
     make_multimodal_multitask_model(
         input_output_tmaps[0],
         input_output_tmaps[1],
         **params,
     )
예제 #11
0
 def test_u_connect_auto_encode(self):
     params = DEFAULT_PARAMS.copy()
     params['pool_x'] = params['pool_y'] = 2
     params['conv_layers'] = [8, 8]
     params['dense_blocks'] = [4, 4, 2]
     params['u_connect'] = defaultdict(set, {SEGMENT_IN: {SEGMENT_IN}})
     m = make_multimodal_multitask_model(
         [SEGMENT_IN],
         [SEGMENT_IN],
         **params,
     )
     assert_model_trains([SEGMENT_IN], [SEGMENT_IN], m)
예제 #12
0
    def loss_from_multimodal_multitask(x):
        model = None
        history = None
        nonlocal i
        i += 1
        try:
            set_args_from_x(args, x)
            model = make_multimodal_multitask_model(**args.__dict__)

            if model.count_params() > args.max_parameters:
                logging.info(
                    f"Model too big, max parameters is:{args.max_parameters}, model has:{model.count_params()}. Return max loss."
                )
                return MAX_LOSS
            generate_train, generate_valid, _ = test_train_valid_tensor_generators(
                **args.__dict__)
            model, history = train_model_from_generators(
                model,
                generate_train,
                generate_valid,
                args.training_steps,
                args.validation_steps,
                args.batch_size,
                args.epochs,
                args.patience,
                args.output_folder,
                args.id,
                args.inspect_model,
                args.inspect_show_labels,
                True,
                False,
            )
            history.history['parameter_count'] = [model.count_params()]
            histories.append(history.history)
            title = f'trial_{i}'  # refer to loss_by_params.txt to find the params for this trial
            plot_metric_history(history, args.training_steps, title, fig_path)
            model.load_weights(
                os.path.join(args.output_folder, args.id, args.id + MODEL_EXT))
            loss_and_metrics = model.evaluate(test_data,
                                              test_labels,
                                              batch_size=args.batch_size)
            logging.info(
                f'Current architecture:\n{string_from_arch_dict(x)}\nCurrent model size: {model.count_params()}.'
            )
            logging.info(
                f"Iteration {i} out of maximum {args.max_models}\nTest Loss: {loss_and_metrics[0]}"
            )
            generate_train.kill_workers()
            generate_valid.kill_workers()
            return loss_and_metrics[0]

        except ValueError:
            logging.exception(
                'ValueError trying to make a model for hyperparameter optimization. Returning max loss.'
            )
            return MAX_LOSS
        except:
            logging.exception(
                'Error trying hyperparameter optimization. Returning max loss.'
            )
            return MAX_LOSS
        finally:
            del model
            gc.collect()
            if history is None:
                histories.append({
                    'loss': [MAX_LOSS],
                    'val_loss': [MAX_LOSS],
                    'parameter_count': [0]
                })
예제 #13
0
    def test_brain_seg(self, tmpdir):
        tensor_path = '/mnt/disks/brains-all-together/2020-02-11/'
        if not os.path.exists(tensor_path):
            pytest.skip(
                'To test brain segmentation performance, attach disk brains-all-together'
            )

        from ml4h.tensor_from_file import TMAPS
        from ml4h.tensor_generators import test_train_valid_tensor_generators, big_batch_from_minibatch_generator
        from multiprocessing import cpu_count
        from sklearn.metrics import average_precision_score

        tmaps_in = [TMAPS['t1_30_slices_4d']]
        tmaps_out = [TMAPS['t1_seg_30_slices']]
        m = make_multimodal_multitask_model(
            tensor_maps_in=tmaps_in,
            tensor_maps_out=tmaps_out,
            activation='relu',
            learning_rate=1e-3,
            bottleneck_type=BottleneckType.GlobalAveragePoolStructured,
            optimizer='radam',
            dense_layers=[16, 64],
            conv_layers=[32],
            dense_blocks=[32, 24, 16],
            block_size=3,
            conv_type='conv',
            conv_x=[3],
            conv_y=[3],
            conv_z=[2],
            pool_x=2,
            pool_y=2,
            pool_z=1,
            pool_type='max',
            u_connect=defaultdict(set, {tmaps_in[0]: {tmaps_out[0]}}),
        )
        batch_size = 2
        generate_train, generate_valid, generate_test = test_train_valid_tensor_generators(
            tmaps_in,
            tmaps_out,
            tensors=tensor_path,
            batch_size=batch_size,
            valid_ratio=.2,
            test_ratio=.2,
            num_workers=cpu_count(),
            cache_size=1e9 / cpu_count(),
            balance_csvs=[],
            training_steps=64,
            validation_steps=18,
            test_modulo=0,
        )
        try:
            m = train_model_from_generators(
                model=m,
                generate_train=generate_train,
                generate_valid=generate_valid,
                training_steps=64,
                validation_steps=18,
                epochs=24,
                patience=22,
                batch_size=batch_size,
                output_folder=str(tmpdir),
                run_id='brain_seg_test',
                inspect_model=True,
                inspect_show_labels=True,
            )
            test_data, test_labels, test_paths = big_batch_from_minibatch_generator(
                generate_test,
                12,
            )
        finally:
            generate_train.kill_workers()
            generate_test.kill_workers()
            generate_valid.kill_workers()
        y_prediction = m.predict(test_data, batch_size=batch_size)
        y_truth = np.array(test_labels[tmaps_out[0].output_name()])
        expected_precisions = {
            'not_brain_tissue': 1.,
            'csf': .921,
            'grey': .963,
            'white': .989,
        }
        actual_precisions = {}
        for name, idx in tmaps_out[0].channel_map.items():
            average_precision = average_precision_score(
                y_truth[..., idx].flatten(),
                y_prediction[..., idx].flatten(),
            )
            actual_precisions[name] = average_precision
        for name in expected_precisions:
            assert actual_precisions[
                name] >= expected_precisions[name] - MEAN_PRECISION_EPS