def test_language_models(self, input_output_tmaps, tmpdir): params = DEFAULT_PARAMS.copy() m = make_multimodal_multitask_model( tensor_maps_in=input_output_tmaps[0], tensor_maps_out=input_output_tmaps[1], **params) assert_model_trains(input_output_tmaps[0], input_output_tmaps[1], m) path = os.path.join(tmpdir, f'lstm{MODEL_EXT}') m.save(path) params['model_file'] = path make_multimodal_multitask_model( input_output_tmaps[0], input_output_tmaps[1], **params, )
def test_load_unimodal(self, tmpdir, input_tmap, output_tmap): params = DEFAULT_PARAMS.copy() m = make_multimodal_multitask_model( [input_tmap], [output_tmap], **params, ) path = os.path.join(tmpdir, f'm{MODEL_EXT}') m.save(path) params['model_file'] = path make_multimodal_multitask_model( [input_tmap], [output_tmap], **DEFAULT_PARAMS, )
def assert_model_trains(input_tmaps: List[TensorMap], output_tmaps: List[TensorMap], m: Optional[tf.keras.Model] = None, skip_shape_check: bool = False): if m is None: m = make_multimodal_multitask_model( input_tmaps, output_tmaps, **DEFAULT_PARAMS, ) if not skip_shape_check: for tmap, tensor in zip(input_tmaps, m.inputs): assert tensor.shape[1:] == tmap.shape assert tensor.shape[1:] == tmap.shape for tmap, tensor in zip(parent_sort(output_tmaps), m.outputs): assert tensor.shape[1:] == tmap.shape assert tensor.shape[1:] == tmap.shape data = make_training_data(input_tmaps, output_tmaps) history = m.fit(data, steps_per_epoch=2, epochs=2, validation_data=data, validation_steps=2) for tmap in output_tmaps: for metric in tmap.metrics: metric_name = metric if type(metric) == str else metric.__name__ name = f'{tmap.output_name()}_{metric_name}' if len( output_tmaps) > 1 else metric_name assert name in history.history
def test_load_multimodal(self, tmpdir, input_tmaps: List[TensorMap], output_tmaps: List[TensorMap]): m = make_multimodal_multitask_model( input_tmaps, output_tmaps, **DEFAULT_PARAMS, ) path = os.path.join(tmpdir, f'm{MODEL_EXT}') m.save(path) params = DEFAULT_PARAMS.copy() params['model_file'] = path make_multimodal_multitask_model( input_tmaps, output_tmaps, **params, )
def test_load_custom_activations(self, tmpdir, activation): inp, out = CONTINUOUS_TMAPS[:2], CATEGORICAL_TMAPS[:2] params = DEFAULT_PARAMS.copy() params['activation'] = activation m = make_multimodal_multitask_model( inp, out, **params, ) path = os.path.join(tmpdir, f'm{MODEL_EXT}') m.save(path) params['model_file'] = path make_multimodal_multitask_model( inp, out, **params, )
def test_no_dense_layers(self): params = DEFAULT_PARAMS.copy() params['dense_layers'] = [] inp, out = CONTINUOUS_TMAPS[:2], CATEGORICAL_TMAPS[:2] m = make_multimodal_multitask_model( inp, out, **DEFAULT_PARAMS, ) assert_model_trains(inp, out, m)
def test_u_connect_segment(self): params = DEFAULT_PARAMS.copy() params['pool_x'] = params['pool_y'] = 2 params['u_connect'] = defaultdict(set, {SEGMENT_IN: {SEGMENT_OUT}}) m = make_multimodal_multitask_model( [SEGMENT_IN], [SEGMENT_OUT], **params, ) assert_model_trains([SEGMENT_IN], [SEGMENT_OUT], m)
def test_u_connect_no_bottleneck(self): params = DEFAULT_PARAMS.copy() params['pool_x'] = params['pool_y'] = 2 params['bottleneck_type'] = BottleneckType.NoBottleNeck params['u_connect'] = defaultdict(set, {SEGMENT_IN: {SEGMENT_OUT}}) m = make_multimodal_multitask_model( [SEGMENT_IN, TMAPS_UP_TO_4D[0]], [SEGMENT_OUT], **params, ) assert_model_trains([SEGMENT_IN, TMAPS_UP_TO_4D[0]], [SEGMENT_OUT], m)
def test_u_connect_adaptive_normalization(self): params = DEFAULT_PARAMS.copy() params['pool_x'] = params['pool_y'] = 2 params['bottleneck_type'] = BottleneckType.GlobalAveragePoolStructured params['u_connect'] = defaultdict(set, {SEGMENT_IN: {SEGMENT_OUT}}) m = make_multimodal_multitask_model( [SEGMENT_IN, TMAPS_UP_TO_4D[0]], [SEGMENT_OUT], **params, ) assert_model_trains([SEGMENT_IN, TMAPS_UP_TO_4D[0]], [SEGMENT_OUT], m)
def test_multimodal_multitask_variational(self, input_output_tmaps, tmpdir): """ Tests 1d->2d, 2d->1d, (1d,2d)->(1d,2d) """ params = DEFAULT_PARAMS.copy() params['bottleneck_type'] = BottleneckType.Variational params['pool_x'] = params['pool_y'] = 2 m = make_multimodal_multitask_model(input_output_tmaps[0], input_output_tmaps[1], **params) assert_model_trains(input_output_tmaps[0], input_output_tmaps[1], m) m.save(os.path.join(tmpdir, 'vae.h5')) path = os.path.join(tmpdir, f'm{MODEL_EXT}') m.save(path) params['model_file'] = path make_multimodal_multitask_model( input_output_tmaps[0], input_output_tmaps[1], **params, )
def test_u_connect_auto_encode(self): params = DEFAULT_PARAMS.copy() params['pool_x'] = params['pool_y'] = 2 params['conv_layers'] = [8, 8] params['dense_blocks'] = [4, 4, 2] params['u_connect'] = defaultdict(set, {SEGMENT_IN: {SEGMENT_IN}}) m = make_multimodal_multitask_model( [SEGMENT_IN], [SEGMENT_IN], **params, ) assert_model_trains([SEGMENT_IN], [SEGMENT_IN], m)
def loss_from_multimodal_multitask(x): model = None history = None nonlocal i i += 1 try: set_args_from_x(args, x) model = make_multimodal_multitask_model(**args.__dict__) if model.count_params() > args.max_parameters: logging.info( f"Model too big, max parameters is:{args.max_parameters}, model has:{model.count_params()}. Return max loss." ) return MAX_LOSS generate_train, generate_valid, _ = test_train_valid_tensor_generators( **args.__dict__) model, history = train_model_from_generators( model, generate_train, generate_valid, args.training_steps, args.validation_steps, args.batch_size, args.epochs, args.patience, args.output_folder, args.id, args.inspect_model, args.inspect_show_labels, True, False, ) history.history['parameter_count'] = [model.count_params()] histories.append(history.history) title = f'trial_{i}' # refer to loss_by_params.txt to find the params for this trial plot_metric_history(history, args.training_steps, title, fig_path) model.load_weights( os.path.join(args.output_folder, args.id, args.id + MODEL_EXT)) loss_and_metrics = model.evaluate(test_data, test_labels, batch_size=args.batch_size) logging.info( f'Current architecture:\n{string_from_arch_dict(x)}\nCurrent model size: {model.count_params()}.' ) logging.info( f"Iteration {i} out of maximum {args.max_models}\nTest Loss: {loss_and_metrics[0]}" ) generate_train.kill_workers() generate_valid.kill_workers() return loss_and_metrics[0] except ValueError: logging.exception( 'ValueError trying to make a model for hyperparameter optimization. Returning max loss.' ) return MAX_LOSS except: logging.exception( 'Error trying hyperparameter optimization. Returning max loss.' ) return MAX_LOSS finally: del model gc.collect() if history is None: histories.append({ 'loss': [MAX_LOSS], 'val_loss': [MAX_LOSS], 'parameter_count': [0] })
def test_brain_seg(self, tmpdir): tensor_path = '/mnt/disks/brains-all-together/2020-02-11/' if not os.path.exists(tensor_path): pytest.skip( 'To test brain segmentation performance, attach disk brains-all-together' ) from ml4h.tensor_from_file import TMAPS from ml4h.tensor_generators import test_train_valid_tensor_generators, big_batch_from_minibatch_generator from multiprocessing import cpu_count from sklearn.metrics import average_precision_score tmaps_in = [TMAPS['t1_30_slices_4d']] tmaps_out = [TMAPS['t1_seg_30_slices']] m = make_multimodal_multitask_model( tensor_maps_in=tmaps_in, tensor_maps_out=tmaps_out, activation='relu', learning_rate=1e-3, bottleneck_type=BottleneckType.GlobalAveragePoolStructured, optimizer='radam', dense_layers=[16, 64], conv_layers=[32], dense_blocks=[32, 24, 16], block_size=3, conv_type='conv', conv_x=[3], conv_y=[3], conv_z=[2], pool_x=2, pool_y=2, pool_z=1, pool_type='max', u_connect=defaultdict(set, {tmaps_in[0]: {tmaps_out[0]}}), ) batch_size = 2 generate_train, generate_valid, generate_test = test_train_valid_tensor_generators( tmaps_in, tmaps_out, tensors=tensor_path, batch_size=batch_size, valid_ratio=.2, test_ratio=.2, num_workers=cpu_count(), cache_size=1e9 / cpu_count(), balance_csvs=[], training_steps=64, validation_steps=18, test_modulo=0, ) try: m = train_model_from_generators( model=m, generate_train=generate_train, generate_valid=generate_valid, training_steps=64, validation_steps=18, epochs=24, patience=22, batch_size=batch_size, output_folder=str(tmpdir), run_id='brain_seg_test', inspect_model=True, inspect_show_labels=True, ) test_data, test_labels, test_paths = big_batch_from_minibatch_generator( generate_test, 12, ) finally: generate_train.kill_workers() generate_test.kill_workers() generate_valid.kill_workers() y_prediction = m.predict(test_data, batch_size=batch_size) y_truth = np.array(test_labels[tmaps_out[0].output_name()]) expected_precisions = { 'not_brain_tissue': 1., 'csf': .921, 'grey': .963, 'white': .989, } actual_precisions = {} for name, idx in tmaps_out[0].channel_map.items(): average_precision = average_precision_score( y_truth[..., idx].flatten(), y_prediction[..., idx].flatten(), ) actual_precisions[name] = average_precision for name in expected_precisions: assert actual_precisions[ name] >= expected_precisions[name] - MEAN_PRECISION_EPS