Exemple #1
0
class Model(BaseSpec):
    prediction = fields.Nested(Prediction)
    inputs = fields.Nested(InputArray, many=True)
    outputs = fields.Nested(OutputArray, many=True)
    training = fields.Dict(missing=None)

    config = fields.Dict(missing=dict)
class BioImageIoManifest(PyBioSchema):
    format_version = fields.String(validate=validate.OneOf(
        raw_nodes.FormatVersion.__args__),
                                   required=True)
    config = fields.Dict()

    application = fields.List(fields.Dict, missing=list)
    collection = fields.List(fields.Dict, missing=list)
    model = fields.List(fields.Nested(BioImageIoManifestModelEntry),
                        missing=list)
    dataset = fields.List(fields.Dict, missing=list)
    notebook = fields.List(fields.Nested(BioImageIoManifestNotebookEntry),
                           missing=list)
class BioImageIoManifestNotebookEntry(PyBioSchema):
    id = fields.String(required=True)
    name = fields.String(required=True)
    documentation = fields.String(required=True)
    description = fields.String(required=True)

    cite = fields.List(fields.Nested(CiteEntry), missing=list)
    authors = fields.List(fields.String, required=True)
    covers = fields.List(fields.URI, missing=list)

    badges = fields.List(fields.Nested(Badge), missing=list)
    tags = fields.List(fields.String, missing=list)
    source = fields.URI(required=True)
    links = fields.List(fields.String, missing=list)  # todo: make List[URI]?
Exemple #4
0
class BaseSpec(PyBioSchema):
    name = fields.String(required=True)
    format_version = fields.String(required=True)
    description = fields.String(required=True)
    cite = fields.Nested(CiteEntry(), many=True, required=True)
    authors = fields.List(fields.String(required=True))
    documentation = fields.Path(required=True)
    tags = fields.List(fields.String, required=True)
    license = fields.String(required=True)

    language = fields.String(required=True)
    framework = fields.String(missing=None)
    source = fields.String(required=True)
    required_kwargs = fields.List(fields.String, missing=list)
    optional_kwargs = fields.Dict(fields.String, missing=dict)

    test_input = fields.Path(missing=None)
    test_output = fields.Path(missing=None)
    covers = fields.List(fields.Path, missing=list)
class OutputTensor(Tensor):
    shape = fields.OutputShape(required=True)
    halo = fields.List(
        fields.Integer,
        missing=None,
        bioimageio_description=
        "The halo to crop from the output tensor (for example to crop away boundary effects or "
        "for tiling). The halo should be cropped from both sides, i.e. `shape_after_crop = shape - 2 * halo`. The "
        "`halo` is not cropped by the bioimage.io model, but is left to be cropped by the consumer software. Use "
        "`shape:offset` if the model output itself is cropped and input and output shapes not fixed.",
    )
    postprocessing = fields.List(
        fields.Nested(Postprocessing),
        missing=list,
        bioimageio_description=
        "Description of how this output should be postprocessed.",
    )
    processing_name = "postprocessing"

    @validates_schema
    def matching_halo_length(self, data, **kwargs):
        shape = data["shape"]
        halo = data["halo"]
        if halo is None:
            return
        elif isinstance(shape, list) or isinstance(
                shape, raw_nodes.ImplicitOutputShape):
            if len(halo) != len(shape):
                raise PyBioValidationException(
                    f"halo {halo} has to have same length as shape {shape}!")
        else:
            raise NotImplementedError(type(shape))

    @post_load
    def make_object(self, data, **kwargs):
        shape = data["shape"]
        halo = data["halo"]
        if halo is None:
            data["halo"] = [0] * len(shape)

        return super().make_object(data, **kwargs)
class InputTensor(Tensor):
    shape = fields.InputShape(
        required=True, bioimageio_description="Specification of tensor shape.")
    preprocessing = fields.List(
        fields.Nested(Preprocessing),
        missing=list,
        bioimageio_description=
        "Description of how this input should be preprocessed.",
    )
    processing_name = "preprocessing"

    @validates_schema
    def zero_batch_step_and_one_batch_size(self, data, **kwargs):
        axes = data["axes"]
        shape = data["shape"]

        bidx = axes.find("b")
        if bidx == -1:
            return

        if isinstance(shape, raw_nodes.ImplicitInputShape):
            step = shape.step
            shape = shape.min

        elif isinstance(shape, list):
            step = [0] * len(shape)
        else:
            raise PyBioValidationException(f"Unknown shape type {type(shape)}")

        if step[bidx] != 0:
            raise PyBioValidationException(
                "Input shape step has to be zero in the batch dimension (the batch dimension can always be "
                "increased, but `step` should specify how to increase the minimal shape to find the largest "
                "single batch shape)")

        if shape[bidx] != 1:
            raise PyBioValidationException(
                "Input shape has to be 1 in the batch dimension b.")
class Model(Spec):
    bioimageio_description = f"""# BioImage.IO Model Description File Specification {raw_nodes.FormatVersion.__args__[-1]}
A model entry in the bioimage.io model zoo is defined by a configuration file model.yaml.
The configuration file must contain the following fields; optional fields are indicated by _optional_.
_optional*_ with an asterisk indicates the field is optional depending on the value in another field.
"""
    name = fields.String(
        # validate=validate.Length(max=36),  # todo: enforce in future version
        required=True,
        bioimageio_description=
        "Name of this model. It should be human-readable and only contain letters, numbers, "
        "`_`, `-` or spaces and not be longer than 36 characters.",
    )

    packaged_by = fields.List(
        fields.String,
        missing=list,
        bioimageio_description=
        f"The persons that have packaged and uploaded this model. Only needs to be specified if "
        f"different from `authors` in root or any {WeightsEntry.__name__}.",
    )

    parent = fields.Nested(
        ModelParent,
        missing=None,
        bioimageio_description=
        "Parent model from which the trained weights of this model have been derived, e.g. by "
        "finetuning the weights of this model on a different dataset. For format changes of the same trained model "
        "checkpoint, see `weights`.",
    )

    source = fields.ImportableSource(
        missing=None,
        bioimageio_maybe_required=True,
        bioimageio_description=
        "Language and framework specific implementation. As some weights contain the model "
        "architecture, the source is optional depending on the present weight formats. `source` can either point to a "
        "local implementation: `<relative path to file>:<identifier of implementation within the source file>` or the "
        "implementation in an available dependency: `<root-dependency>.<sub-dependency>.<identifier>`.\nFor example: "
        "`./my_function:MyImplementation` or `core_library.some_module.some_function`.",
    )
    sha256 = fields.String(
        validate=validate.Length(equal=64),
        missing=None,
        bioimageio_description="SHA256 checksum of the model source code file."
        + _common_sha256_hint +
        " This field is only required if the field source is present.",
    )
    kwargs = fields.Kwargs(
        bioimageio_description=
        "Keyword arguments for the implementation specified by `source`. "
        "This field is only required if the field `source` is present.")

    weights = fields.Dict(
        fields.String(
            validate=validate.OneOf(raw_nodes.WeightsFormat.__args__),
            required=True,
            bioimageio_description=
            f"Format of this set of weights. Weight formats can define additional (optional or "
            f"required) fields. See [supported_formats_and_operations.md#Weight Format]"
            f"(https://github.com/bioimage-io/configuration/blob/master/supported_formats_and_operations.md#weight_format). "
            f"One of: {', '.join(raw_nodes.WeightsFormat.__args__)}",
        ),
        fields.Nested(WeightsEntry),
        required=True,
        bioimageio_description=
        "The weights for this model. Weights can be given for different formats, but should "
        "otherwise be equivalent. The available weight formats determine which consumers can use this model.",
    )

    inputs = fields.Nested(
        InputTensor,
        many=True,
        bioimageio_description=
        "Describes the input tensors expected by this model.")
    outputs = fields.Nested(
        OutputTensor,
        many=True,
        bioimageio_description="Describes the output tensors from this model.")

    test_inputs = fields.List(
        fields.URI,
        required=True,
        bioimageio_description=
        "List of URIs to test inputs as described in inputs for a single test case. "
        "Supported file formats/extensions: '.npy'",
    )
    test_outputs = fields.List(
        fields.URI,
        required=True,
        bioimageio_description="Analog to to test_inputs.")

    sample_inputs = fields.List(
        fields.URI,
        missing=[],
        bioimageio_description=
        "List of URIs to sample inputs to illustrate possible inputs for the model, for example "
        "stored as png or tif images.",
    )
    sample_outputs = fields.List(
        fields.URI,
        missing=[],
        bioimageio_description=
        "List of URIs to sample outputs corresponding to the `sample_inputs`.",
    )

    config = fields.Dict(
        missing=dict,
        bioimageio_description="""
A custom configuration field that can contain any other keys which are not defined above. It can be very specifc to a framework or specific tool. To avoid conflicted definitions, it is recommended to wrap configuration into a sub-field named with the specific framework or tool name.

For example:
```yaml
config:
  # custom config for DeepImageJ, see https://github.com/bioimage-io/configuration/issues/23
  deepimagej:
    model_keys:
      # In principle the tag "SERVING" is used in almost every tf model
      model_tag: tf.saved_model.tag_constants.SERVING
      # Signature definition to call the model. Again "SERVING" is the most general
      signature_definition: tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
    test_information:
      input_size: [2048x2048] # Size of the input images
      output_size: [1264x1264 ]# Size of all the outputs
      device: cpu # Device used. In principle either cpu or GPU
      memory_peak: 257.7 Mb # Maximum memory consumed by the model in the device
      runtime: 78.8s # Time it took to run the model
      pixel_size: [9.658E-4µmx9.658E-4µm] # Size of the pixels of the input
```
""",
    )

    @validates_schema
    def language_and_framework_match(self, data, **kwargs):
        field_names = ("language", "framework")
        valid_combinations = {
            ("python", "scikit-learn"): {
                "requires_source": False
            },
            ("python", "pytorch"): {
                "requires_source": True
            },
            ("python", "tensorflow"): {
                "requires_source": False
            },
            ("java", "tensorflow"): {
                "requires_source": False
            },
        }
        combination = tuple(data[name] for name in field_names)
        if combination not in valid_combinations:
            raise PyBioValidationException(
                f"invalid combination of {dict(zip(field_names, combination))}"
            )

        if valid_combinations[combination]["requires_source"] and data.get(
                "source") is None:
            raise PyBioValidationException(
                f"{dict(zip(field_names, combination))} require source code to be specified."
            )

    @validates_schema
    def source_specified_if_required(self, data, **kwargs):
        if data["source"] is not None:
            return

        weight_format_requires_source = {
            "pickle": True,
            "pytorch_state_dict": True,
            "pytorch_script": False,
            "keras_hdf5": False,
            "tensorflow_js": False,
            "tensorflow_saved_model_bundle": False,
            "onnx": False,
        }
        require_source = {
            wf
            for wf in data["weights"] if weight_format_requires_source[wf]
        }
        if require_source:
            raise PyBioValidationException(
                f"These specified weight formats require source code to be specified: {require_source}"
            )

    @validates_schema
    def validate_reference_tensor_names(self, data, **kwargs):
        valid_input_tensor_references = [ipt.name for ipt in data["inputs"]]
        for out in data["outputs"]:
            for postpr in out.postprocessing:
                ref_tensor = postpr.kwargs.get("reference_tensor", None)
                if ref_tensor is not None and ref_tensor not in valid_input_tensor_references:
                    raise PyBioValidationException(
                        f"{ref_tensor} not found in inputs")

    @validates_schema
    def weights_entries_match_weights_formats(self, data, **kwargs):
        weights: typing.Dict[str, WeightsEntry] = data["weights"]
        for weights_format, weights_entry in weights.items():
            if "tensorflow" not in weights_format and weights_entry.tensorflow_version is not None:
                raise PyBioValidationException(
                    f"invalid 'tensorflow_version' entry for weights format {weights_format}"
                )

            if weights_format != "onnx" and weights_entry.opset_version is not None:
                raise PyBioValidationException(
                    f"invalid 'opset_version' entry for weights format {weights_format} (only valid for onnx)"
                )
class Spec(PyBioSchema):
    format_version = fields.String(
        validate=validate.OneOf(raw_nodes.FormatVersion.__args__),
        required=True,
        bioimageio_description_order=0,
        bioimageio_description=
        f"""Version of the BioImage.IO Model Description File Specification used.
This is mandatory, and important for the consumer software to verify before parsing the fields.
The recommended behavior for the implementation is to keep backward compatibility and throw an error if the model yaml
is in an unsupported format version. The current format version described here is
{raw_nodes.FormatVersion.__args__[-1]}""",
    )
    name = fields.String(required=True)
    description = fields.String(
        required=True,
        bioimageio_description="A string containing a brief description.")

    authors = fields.List(
        fields.String,
        required=True,
        bioimageio_description="""A list of author strings.
A string can be separated by `;` in order to identify multiple handles per author.
The authors are the creators of the specifications and the primary points of contact.""",
    )
    cite = fields.Nested(
        CiteEntry,
        many=True,
        required=True,
        bioimageio_description="""A citation entry or list of citation entries.
Each entry contains a mandatory `text` field and either one or both of `doi` and `url`.
E.g. the citation for the model architecture and/or the training data used.""",
    )

    git_repo = fields.String(
        validate=validate.URL(schemes=["http", "https"]),
        missing=None,
        bioimageio_description=
        """A url to the git repository, e.g. to Github or Gitlab.
If the model is contained in a subfolder of a git repository, then a url to the exact folder
(which contains the configuration yaml file) should be used.""",
    )
    tags = fields.List(fields.String,
                       required=True,
                       bioimageio_description="A list of tags.")
    license = fields.String(
        required=True,
        bioimageio_description=
        "A string to a common license name (e.g. `MIT`, `APLv2`) or a relative path to the "
        "license file.",
    )

    documentation = fields.URI(
        required=True,
        bioimageio_description=
        "Relative path to file with additional documentation in markdown.")
    covers = fields.List(
        fields.URI,
        missing=list,
        bioimageio_description=
        "A list of cover images provided by either a relative path to the model folder, or a "
        "hyperlink starting with 'https'.Please use an image smaller than 500KB and an aspect ratio width to height "
        "of 2:1. The supported image formats are: 'jpg', 'png', 'gif'.",  # todo: validate image format
    )
    attachments = fields.Dict(
        fields.String,
        fields.Union([fields.URI(), fields.List(fields.URI)]),
        missing=dict,
        bioimageio_maybe_required=True,
        bioimageio_description=
        """Dictionary of text keys and URI (or a list of URI) values to additional, relevant
files. E.g. we can place a list of URIs under the `files` to list images and other files that are necessary for the
documentation or for the model to run, these files will be included when generating the model package.""",
    )

    run_mode = fields.Nested(
        RunMode,
        missing=None,
        bioimageio_description=
        "Custom run mode for this model: for more complex prediction procedures like test time "
        "data augmentation that currently cannot be expressed in the specification. The different run modes should be "
        "listed in [supported_formats_and_operations.md#Run Modes]"
        "(https://github.com/bioimage-io/configuration/blob/master/supported_formats_and_operations.md#run-modes).",
    )
    config = fields.Dict(missing=dict)

    language = fields.String(
        validate=validate.OneOf(raw_nodes.Language.__args__),
        missing=None,
        bioimageio_maybe_required=True,
        bioimageio_description=
        f"Programming language of the source code. One of: "
        f"{', '.join(raw_nodes.Language.__args__)}. This field is only required if the field `source` is present.",
    )
    framework = fields.String(
        validate=validate.OneOf(raw_nodes.Framework.__args__),
        missing=None,
        bioimageio_description=
        f"The deep learning framework of the source code. One of: "
        f"{', '.join(raw_nodes.Framework.__args__)}. This field is only required if the field `source` is present.",
    )
    dependencies = fields.Dependencies(
        missing=None,
        bioimageio_description=
        "Dependency manager and dependency file, specified as `<dependency manager>:<relative "
        "path to file>`. For example: 'conda:./environment.yaml', 'maven:./pom.xml', or 'pip:./requirements.txt'",
    )
    timestamp = fields.DateTime(
        required=True,
        bioimageio_description=
        "Timestamp of the initial creation of this model in [ISO 8601]"
        "(#https://en.wikipedia.org/wiki/ISO_8601) format.",
    )
Exemple #9
0
class TransformationSpec(BaseSpec):
    dependencies = fields.Dependencies(required=True)
    inputs = fields.Nested(InputArray, required=True)
    outputs = fields.Nested(OutputArray, required=True)
Exemple #10
0
class OutputArray(Array):
    shape = fields.Union([fields.ExplicitShape(), fields.Nested(OutputShape)], required=True)
    halo = fields.List(fields.Integer, missing=None)
Exemple #11
0
class ReaderSpec(BaseSpec):
    dependencies = fields.Dependencies(missing=None)
    outputs = fields.Nested(OutputArray, required=True)
Exemple #12
0
class Setup(PyBioSchema):
    samplers = fields.List(fields.Nested(Sampler, required=True), required=True)
    preprocess = fields.Nested(Transformation, many=True, missing=list)
    postprocess = fields.Nested(Transformation, many=True, missing=list)
    losses = fields.Nested(Transformation, many=True, missing=list)
    optimizer = fields.Nested(Optimizer, missing=None)
Exemple #13
0
class Sampler(SpecWithKwargs):
    spec = fields.SpecURI(SamplerSpec)
    readers = fields.List(fields.Nested(Reader, required=True), required=True)
Exemple #14
0
class SamplerSpec(BaseSpec):
    dependencies = fields.Dependencies(missing=None)
    outputs = fields.Nested(OutputArray, missing=None)
Exemple #15
0
class Reader(SpecWithKwargs):
    spec = fields.SpecURI(ReaderSpec)
    transformations = fields.List(fields.Nested(Transformation), missing=list)
Exemple #16
0
class InputArray(Array):
    shape = fields.Union([fields.ExplicitShape(), fields.Nested(InputShape)], required=True)
Exemple #17
0
class Prediction(PyBioSchema):
    weights = fields.Nested(Weights, missing=None)
    dependencies = fields.Dependencies(missing=None)
    preprocess = fields.Nested(Transformation, many=True, missing=list)
    postprocess = fields.Nested(Transformation, many=True, missing=list)