def test_Net3DArabidopsisOvules_forward(cache_path): spec_path = ( Path(__file__).parent / "../../bioimage-io/UNet3DArabidopsisOvules.model/UNet3DArabidopsisOvules.model.yaml" ).resolve() assert spec_path.exists(), spec_path pybio_model = load_model(str(spec_path), cache_path=cache_path) assert pybio_model.spec.outputs[0].shape.reference_input == "raw" assert pybio_model.spec.outputs[0].shape.scale == (1, 1, 1, 1, 1) assert pybio_model.spec.outputs[0].shape.offset == (0, 0, 0, 0, 0) assert isinstance(pybio_model.spec.prediction.weights.source, BytesIO) assert pybio_model.spec.test_input is not None assert pybio_model.spec.test_input.suffix == ".npy", pybio_model.spec.test_input.suffix assert pybio_model.spec.test_output is not None assert pybio_model.spec.test_output.suffix == ".npy", pybio_model.spec.test_output.suffix model: torch.nn.Module = get_instance(pybio_model) assert isinstance(model, UNet3D) assert hasattr(model, "forward") model_weights = torch.load(pybio_model.spec.prediction.weights.source, map_location=torch.device("cpu")) model.load_state_dict(model_weights) pre_transformations = [ get_instance(trf) for trf in pybio_model.spec.prediction.preprocess ] post_transformations = [ get_instance(trf) for trf in pybio_model.spec.prediction.postprocess ] ipt = numpy.load(str(pybio_model.spec.test_input)) assert len(ipt.shape) == 5 assert ipt.shape == pybio_model.spec.inputs[0].shape expected = numpy.load(str(pybio_model.spec.test_output)) assert pybio_model.spec.outputs[ 0].shape.reference_input == pybio_model.spec.inputs[0].name assert all([s == 1 for s in pybio_model.spec.outputs[0].shape.scale]) assert all([off == 0 for off in pybio_model.spec.outputs[0].shape.offset]) assert expected.shape == pybio_model.spec.inputs[0].shape test_roi = (slice(0, 1), slice(0, 1), slice(0, 32), slice(0, 32), slice(0, 32)) # to lower test mem consumption ipt = ipt[test_roi] expected = expected[test_roi] ipt = apply_transformations(pre_transformations, ipt) assert isinstance(ipt, list) assert len(ipt) == 1 ipt = ipt[0] out = model.forward(ipt) out = apply_transformations(post_transformations, out) assert isinstance(out, list) assert len(out) == 1 out = out[0] # assert out.shape == pybio_model.spec.inputs[0].shape # test_roi makes test invalid numpy.save("out.npy", out) assert str( out.dtype).split(".")[-1] == pybio_model.spec.outputs[0].data_type assert numpy.allclose(expected, out, atol=0.1) # test_roi requires bigger atol
def test_forward(cache_path): spec_path = ( Path(__file__).parent / "../../segmentation/cells/UNet3DPlatyCellProbs.model/UNet3DPlatyCellProbs.model.yaml" ) assert spec_path.exists(), spec_path.absolute() pybio_model = load_model(str(spec_path), cache_path=cache_path) assert pybio_model.spec.outputs[0].shape.reference_input == "raw" assert pybio_model.spec.outputs[0].shape.scale == (1, 1, 1, 1, 1) assert pybio_model.spec.outputs[0].shape.offset == (0, 0, 0, 0, 0) model: torch.nn.Module = get_instance(pybio_model) assert hasattr(model, "forward") assert isinstance(model, UNetAnisotropic) model_weights = torch.load(pybio_model.spec.prediction.weights.source, map_location=torch.device("cpu")) model.load_state_dict(model_weights) pre_transformations = [ get_instance(trf) for trf in pybio_model.spec.prediction.preprocess ] post_transformations = [ get_instance(trf) for trf in pybio_model.spec.prediction.postprocess ] ipt_npz = numpy.load(str(pybio_model.spec.test_input)) # npz to ndarray ipt = [ipt_npz[ipt_npz.files[0]]] ipt_npz.close() assert len(ipt) == len(pybio_model.spec.inputs) assert isinstance(ipt, list) assert len(ipt) == 1 ipt = ipt[0] assert ipt.shape == pybio_model.spec.inputs[0].shape # Don't test with the real test io, but a smaller test_roi. # Because the results differ due to edge effects, load the small test output instead # (this one is not linked to in the model.yaml) assert len(ipt.shape) == 5, ipt.shape test_roi = (slice(0, 1), slice(0, 1), slice(0, 32), slice(0, 32), slice(0, 32)) # to lower test mem consumption ipt = ipt[test_roi] expected_npz = numpy.load( str(pybio_model.spec.test_output).replace("test_output.npz", "test_output_small.npz")) # npz to npy expected = expected_npz[expected_npz.files[0]] expected_npz.close() ipt = apply_transformations(pre_transformations, ipt) assert isinstance(ipt, list) assert len(ipt) == 1 out = model.forward(*ipt) out = apply_transformations(post_transformations, out) assert isinstance(out, list) assert len(out) == 1 out = out[0] # assert out.shape == pybio_model.spec.inputs[0].shape # test_roi makes this test invalid assert str( out.dtype).split(".")[-1] == pybio_model.spec.outputs[0].data_type assert numpy.allclose(out, expected)
def classic_fit(pybio_model: Model, start: int = 0, batch_size: int = 1): """classic fit a la 'model.fit(X, y)'""" model = get_instance(pybio_model) sampler = get_instance(pybio_model.spec.training.setup.sampler) X, y = sampler[start, batch_size] model.fit([X], [y]) return model
def test_BroadNucleusDataBinarized(cache_path): spec_path = Path( __file__ ).parent / "../../../specs/readers/BroadNucleusDataBinarized.reader.yaml" pybio_reader = load_spec_and_kwargs(str(spec_path), kwargs={}, cache_path=cache_path) reader = utils.get_instance(pybio_reader) assert isinstance(reader, BroadNucleusDataBinarized) roi = ((slice(2), slice(4), slice(6)), (slice(3), slice(5), slice(7))) x, y = reader[roi] assert numpy.equal( x, [ [ [138.0, 130.0, 140.0, 141.0, 138.0, 139.0], [134.0, 134.0, 135.0, 137.0, 139.0, 136.0], [127.0, 132.0, 140.0, 137.0, 130.0, 140.0], [131.0, 134.0, 136.0, 135.0, 141.0, 141.0], ], [ [133.0, 136.0, 134.0, 134.0, 130.0, 135.0], [125.0, 132.0, 136.0, 133.0, 132.0, 127.0], [135.0, 132.0, 137.0, 127.0, 138.0, 131.0], [135.0, 129.0, 133.0, 138.0, 139.0, 133.0], ], ], ).all() assert numpy.equal(y, numpy.zeros((3, 5, 7), dtype=bool)).all()
def test_load_specs_from_manifest(cache_path, category, spec_path): spec_path = MANIFEST_PATH.parent / spec_path assert spec_path.exists() loaded_spec = load_and_resolve_spec(str(spec_path)) instance = utils.get_instance(loaded_spec) assert instance
def test_load_specs_from_manifest(cache_path, category, spec_path, required_spec_kwargs): kwargs = required_spec_kwargs.get(spec_path, {}) spec_path = MANIFEST_PATH.parent / spec_path assert spec_path.exists() loaded_spec = load_spec_and_kwargs(str(spec_path), **kwargs, cache_path=cache_path) instance = utils.get_instance(loaded_spec) assert instance
def eval_model_zip(model_zip: ZipFile, cache_path: Path): with TemporaryDirectory() as tempdir: temp_path = Path(tempdir) if cache_path is None: cache_path = temp_path / "cache" model_zip.extractall(temp_path) spec_file_str = guess_model_path([str(file_name) for file_name in temp_path.glob("*")]) pybio_model = load_model(spec_file_str, root_path=temp_path, cache_path=cache_path) return get_instance(pybio_model)
def __init__( self, *, pybio_model: nodes.Model, devices=List[str], ): spec = pybio_model self.name = spec.name if len(spec.inputs) != 1 or len(spec.outputs) != 1: raise NotImplementedError("Only single input, single output models are supported") assert len(spec.inputs) == 1 assert len(spec.outputs) == 1 assert spec.framework == "tensorflow" _input = spec.inputs[0] _output = spec.outputs[0] # FIXME: TF probably uses different axis names self._internal_input_axes = _input.axes self._internal_output_axes = _output.axes if has_batch_dim(self._internal_input_axes): self.input_axes = self._internal_input_axes[1:] self._input_batch_dimension_transform = _add_batch_dim _input_shape = _input.shape[1:] else: self.input_axes = self._internal_input_axes self._input_batch_dimension_transform = _noop _input_shape = _input.shape self.input_shape = list(zip(self.input_axes, _input_shape)) _halo = _output.halo or [0 for _ in _output.axes] if has_batch_dim(self._internal_output_axes): self.output_axes = self._internal_output_axes[1:] self._output_batch_dimension_transform = _remove_batch_dim _halo = _halo[1:] else: self.output_axes = self._internal_output_axes self._output_batch_dimension_transform = _noop self.halo = list(zip(self.output_axes, _halo)) self.model = get_instance(pybio_model) self.devices = [] tf_model = tf.keras.models.load_model(spec.weights["tensorflow_saved_model_bundle"].source) self.model.set_model(tf_model)
def test_2sUNetDA(cache_path): spec_path = (Path(__file__).parent / "../2sUNetDA.model.yaml").resolve() assert spec_path.exists(), spec_path pybio_model = load_model(str(spec_path), cache_path=cache_path) assert isinstance(pybio_model.spec.prediction.weights.source, BytesIO) assert pybio_model.spec.test_input is not None assert pybio_model.spec.test_output is not None model: torch.nn.Module = get_instance(pybio_model) if torch.cuda.is_available(): test_device = torch.device("cuda") model = model.to(device=test_device) else: test_device = torch.device("cpu") model.eval() model_weights = torch.load(pybio_model.spec.prediction.weights.source, map_location=test_device) model.load_state_dict(model_weights) pre_transformations = [get_instance(trf) for trf in pybio_model.spec.prediction.preprocess] post_transformations = [get_instance(trf) for trf in pybio_model.spec.prediction.postprocess] test_ipt = numpy.load(str(pybio_model.spec.test_input)) test_out = numpy.load(str(pybio_model.spec.test_output)) assert hasattr(model, "forward") preprocessed_inputs = apply_transformations(pre_transformations, test_ipt) assert isinstance(preprocessed_inputs, list) assert len(preprocessed_inputs) == 1 out = model.forward(*[t.to(test_device) for t in preprocessed_inputs]) postprocessed_outputs = apply_transformations(post_transformations, out) assert isinstance(postprocessed_outputs, list) assert len(postprocessed_outputs) == 1 out = postprocessed_outputs[0] assert numpy.allclose(test_out, out)
def convert_weights_to_onnx(model_yaml: Union[str, Path], output_path: Union[str, Path], opset_version: Union[str, None] = 12, use_tracing: bool = True, verbose: bool = True): """ Convert model weights from format 'pytorch_state_dict' to 'onnx'. Arguments: model_yaml: location of the model.yaml file with bioimage.io spec output_path: where to save the onnx weights opset_version: onnx opset version use_tracing: whether to use tracing or scripting to export the onnx format verbose: be verbose during the onnx export """ spec = load_and_resolve_spec(model_yaml) with torch.no_grad(): # load input and expected output data input_data = np.load(spec.test_inputs[0]).astype('float32') input_tensor = torch.from_numpy(input_data) # instantiate and generate the expected output model = get_instance(spec) state = torch.load(spec.weights['pytorch_state_dict'].source) model.load_state_dict(state) expected_output = model(input_tensor).numpy() if use_tracing: torch.onnx.export(model, input_tensor, output_path, verbose=verbose, opset_version=opset_version) else: raise NotImplementedError # check the onnx model sess = rt.InferenceSession(output_path) input_name = sess.get_inputs()[0].name output = sess.run(None, {input_name: input_data})[0] try: assert_array_almost_equal(expected_output, output, decimal=4) return 0 except AssertionError as e: msg = f"The onnx weights were exported, but results before and after conversion do not agree:\n {str(e)}" warnings.warn(msg) return 1
def __init__( self, *, pybio_model: nodes.Model, devices=Sequence[str], ): self._internal_output_axes = pybio_model.outputs[0].axes spec = pybio_model self.model = get_instance(pybio_model) self.devices = [torch.device(d) for d in devices] self.model.to(self.devices[0]) assert isinstance(self.model, torch.nn.Module) weights = spec.weights.get("pytorch_state_dict") if weights is not None and weights.source: state = torch.load(weights.source, map_location=self.devices[0]) self.model.load_state_dict(state)
def generate_output(path): spec = load_and_resolve_spec(path) with torch.no_grad(): print("Loading inputs and outputs:") # load input and expected output data input_data = np.load(spec.test_inputs[0]).astype('float32') input_data = torch.from_numpy(input_data) # instantiate and trace the model print("Predicting model") model = get_instance(spec) state = torch.load(spec.weights['pytorch_state_dict'].source) model.load_state_dict(state) # check the scripted model output_data = model(input_data).numpy() assert output_data.shape == input_data.shape np.save('./test_output.npy', output_data)
def __init__( self, *, pybio_model: nodes.Model, devices=List[str], ): spec = pybio_model self.name = spec.name spec.inputs[0] _output = spec.outputs[0] # FIXME: TF probably uses different axis names self._internal_output_axes = _output.axes self.model = get_instance(pybio_model) self.devices = [] tf_model = tf.keras.models.load_model( spec.weights["tensorflow_saved_model_bundle"].source) self.model.set_model(tf_model)
def convert_weights_to_torchscript(model_yaml: Union[str, Path], output_path: Union[str, Path], use_tracing: bool = True): """ Convert model weights from format 'pytorch_state_dict' to 'torchscript'. """ spec = load_and_resolve_spec(model_yaml) with torch.no_grad(): # load input and expected output data input_data = np.load(spec.test_inputs[0]).astype('float32') input_data = torch.from_numpy(input_data) # instantiate model and get reference output model = get_instance(spec) state = torch.load(spec.weights['pytorch_state_dict'].source) model.load_state_dict(state) # get the expected output to validate the torchscript weights expected_output = model(input_data) # make scripted model if use_tracing: scripted_model = torch.jit.trace(model, input_data) else: scripted_model = torch.jit.script(model) # check the scripted model output = scripted_model(input_data).numpy() # save the torchscript model scripted_model.save(output_path) try: assert_array_almost_equal(expected_output, output, decimal=4) return 0 except AssertionError as e: msg = f"The onnx weights were exported, but results before and after conversion do not agree:\n {str(e)}" warnings.warn(msg) return 1
def simple_training(pybio_model: Model, n_iterations: int, batch_size: int, num_workers: int, out_file: Union[str, Path, IO[bytes]]) -> torch.nn.Module: """ Simplified training loop. """ if isinstance(out_file, str) or isinstance(out_file, Path): out_file = Path(out_file) out_file.parent.mkdir(exist_ok=True) model = get_instance(pybio_model) # instantiate all training parameters from the training config setup = pybio_model.spec.training.setup sampler = get_instance(setup.sampler) preprocess = [get_instance(prep) for prep in setup.preprocess] postprocess = [get_instance(post) for post in setup.postprocess] losses = [get_instance(loss_prep) for loss_prep in setup.losses] optimizer = get_instance(setup.optimizer, params=model.parameters()) # build the data-loader from our sampler loader = DataLoader(sampler, shuffle=True, num_workers=num_workers, batch_size=batch_size) # run the training loop for ii in trange(n_iterations): x, y = next(iter(loader)) optimizer.zero_grad() x, y = apply_transformations(preprocess, x, y) out = model(x) out, y = apply_transformations(postprocess, out, y) losses = apply_transformations(losses, out, y) ll = sum(losses) ll.backward() optimizer.step() # save model weights torch.save(model.state_dict(), out_file) return model
def check_model(path): """ Convert model weights from format 'pytorch_state_dict' to 'torchscript'. """ spec = load_and_resolve_spec(path) with torch.no_grad(): print("Loading inputs and outputs:") # load input and expected output data input_data = np.load(spec.test_inputs[0]).astype('float32') input_data = torch.from_numpy(input_data) expected_output_data = np.load(spec.test_outputs[0]).astype(np.float32) print(input_data.shape) # instantiate and trace the model print("Predicting model") model = get_instance(spec) state = torch.load(spec.weights['pytorch_state_dict'].source) model.load_state_dict(state) # check the scripted model output_data = model(input_data).numpy() assert output_data.shape == expected_output_data.shape assert np.allclose(expected_output_data, output_data) print("Check passed")
def __init__( self, *, pybio_model: nodes.Model, batch_size: int = 1, num_iterations_per_update: int = 2, _devices=Sequence[torch.device], ): self.max_num_iterations = 0 self.iteration_count = 0 self.devices = _devices spec = pybio_model.spec self.name = spec.name if len(spec.inputs) != 1 or len(spec.outputs) != 1: raise NotImplementedError( "Only single input, single output models are supported") assert len(spec.inputs) == 1 assert len(spec.outputs) == 1 _input = spec.inputs[0] _output = spec.outputs[0] self._internal_input_axes = _input.axes self._internal_output_axes = _output.axes if _check_batch_dim(self._internal_input_axes): self.input_axes = self._internal_input_axes[1:] self._input_batch_dimension_transform = _add_batch_dim _input_shape = _input.shape[1:] else: self.input_axes = self._internal_input_axes self._input_batch_dimension_transform = _noop _input_shape = _input.shape self.input_shape = list(zip(self.input_axes, _input_shape)) _halo = _output.halo or [0 for _ in _output.axes] if _check_batch_dim(self._internal_output_axes): self.output_axes = self._internal_output_axes[1:] self._output_batch_dimension_transform = _remove_batch_dim _halo = _halo[1:] else: self.output_axes = self._internal_output_axes self._output_batch_dimension_transform = _noop self.halo = list(zip(self.output_axes, _halo)) self.model = get_instance(pybio_model) self.model.to(self.devices[0]) if spec.framework == "pytorch": assert isinstance(self.model, torch.nn.Module) if spec.prediction.weights is not None: state = torch.load(spec.prediction.weights.source, map_location=self.devices[0]) self.model.load_state_dict(state) else: raise NotImplementedError self._prediction_preprocess = make_concatenated_apply( [get_instance(tf) for tf in spec.prediction.preprocess]) self._prediction_postprocess = make_concatenated_apply( [get_instance(tf) for tf in spec.prediction.postprocess])
def __init__( self, *, pybio_model: nodes.Model, devices=Sequence[str], ): self._max_num_iterations = 0 self._iteration_count = 0 spec = pybio_model self.name = spec.name if len(spec.inputs) != 1 or len(spec.outputs) != 1: raise NotImplementedError("Only single input, single output models are supported") assert len(spec.inputs) == 1 assert len(spec.outputs) == 1 _input = spec.inputs[0] _output = spec.outputs[0] self._internal_input_axes = _input.axes self._internal_output_axes = _output.axes if has_batch_dim(self._internal_input_axes): self.input_axes = self._internal_input_axes[1:] self._input_batch_dimension_transform = _add_batch_dim _input_shape = _input.shape[1:] else: self.input_axes = self._internal_input_axes self._input_batch_dimension_transform = _noop _input_shape = _input.shape self.input_shape = list(zip(self.input_axes, _input_shape)) _halo = _output.halo or [0 for _ in _output.axes] if has_batch_dim(self._internal_output_axes): self.output_axes = self._internal_output_axes[1:] self._output_batch_dimension_transform = _noop _halo = _halo[1:] else: self.output_axes = self._internal_output_axes self._output_batch_dimension_transform = _noop self.halo = list(zip(self.output_axes, _halo)) self.model = get_instance(pybio_model) if spec.framework == "pytorch": self.devices = [torch.device(d) for d in devices] self.model.to(self.devices[0]) assert isinstance(self.model, torch.nn.Module) weights = spec.weights.get("pytorch_state_dict") if weights is not None and weights.source: state = torch.load(weights.source, map_location=self.devices[0]) self.model.load_state_dict(state) # elif spec.framework == "tensorflow": # import tensorflow as tf # self.devices = [] # tf_model = tf.keras.models.load_model(spec.prediction.weights.source) # self.model.set_model(tf_model) else: raise NotImplementedError preprocessing_functions = [ _make_cast(_input.data_type), _to_torch, ] for preprocessing_step in _input.preprocessing: fn = KNOWN_PREPROCESSING.get(preprocessing_step.name) if fn is None: raise NotImplementedError(f"Preprocessing {preprocessing_step.name}") preprocessing_functions.append(fn) self._prediction_preprocess = chain(*preprocessing_functions) postprocessing_functions = [] for postprocessing_step in _output.postprocessing: fn = KNOWN_POSTPROCESSING.get(postprocessing_step.name) if fn is None: raise NotImplementedError(f"Postprocessing {postprocessing_step.name}") postprocessing_functions.append(fn) self._prediction_postprocess = chain(*postprocessing_functions)