Example #1
0
def test_model_loading_on_examples(example):
    """Test extractor
    """
    model_file = "example/models/{0}/dataloader.yaml".format(example)

    dl = DataLoaderDescription.load(model_file)

    # check all the fields exists
    dl.type == "Dataset"

    dl.defined_as
    dl.args
    arg_elem = six.next(six.itervalues(dl.args))
    arg_elem.doc
    arg_elem.type
    arg_elem.optional

    dl.info
    dl.info.authors
    dl.info.name
    dl.info.version
    dl.info.tags
    dl.info.doc

    dl.output_schema
    dl.output_schema.inputs
    inp_elem = six.next(six.itervalues(dl.output_schema.inputs))
    inp_elem.shape
    inp_elem.special_type
    inp_elem.associated_metadata

    dl.output_schema.targets

    dl.output_schema.metadata
Example #2
0
def get_dataloader_factory(dataloader, source="kipoi"):

    # pull the dataloader & get the dataloader directory
    source = kipoi.config.get_source(source)
    yaml_path = source.pull_dataloader(dataloader)
    dataloader_dir = os.path.dirname(yaml_path)

    # --------------------------------------------
    # Setup dataloader description
    with cd(dataloader_dir):  # move to the dataloader directory temporarily
        dl = DataLoaderDescription.load(os.path.basename(yaml_path))
        file_path, obj_name = tuple(dl.defined_as.split("::"))
        CustomDataLoader = getattr(load_module(file_path), obj_name)

    # check that dl.type is correct
    if dl.type not in AVAILABLE_DATALOADERS:
        raise ValueError("dataloader type: {0} is not in supported dataloaders:{1}".
                         format(dl.type, list(AVAILABLE_DATALOADERS.keys())))
    # check that the extractor arguments match yaml arguments
    if not getargs(CustomDataLoader) == set(dl.args.keys()):
        raise ValueError("DataLoader arguments: \n{0}\n don't match ".format(set(getargs(CustomDataLoader))) +
                         "the specification in the dataloader.yaml file:\n{0}".
                         format(set(dl.args.keys())))
    # check that CustomDataLoader indeed interits from the right DataLoader
    if dl.type in DATALOADERS_AS_FUNCTIONS:
        # transform the functions into objects
        assert isinstance(CustomDataLoader, types.FunctionType)
        CustomDataLoader = AVAILABLE_DATALOADERS[dl.type].from_fn(CustomDataLoader)
    else:
        if not issubclass(CustomDataLoader, AVAILABLE_DATALOADERS[dl.type]):
            raise ValueError("DataLoader does't inherit from the specified dataloader: {0}".
                             format(AVAILABLE_DATALOADERS[dl.type].__name__))
    logger.info('successfully loaded the dataloader from {}'.
                format(os.path.normpath(os.path.join(dataloader_dir, dl.defined_as))))
    # Inherit the attributes from dl
    # TODO - make this more automatic / DRY
    # write a method to load those things?
    CustomDataLoader.type = dl.type
    CustomDataLoader.defined_as = dl.defined_as
    CustomDataLoader.args = dl.args
    CustomDataLoader.info = dl.info
    CustomDataLoader.output_schema = dl.output_schema
    CustomDataLoader.dependencies = dl.dependencies
    CustomDataLoader.postprocessing = dl.postprocessing
    # keep it hidden?
    CustomDataLoader._yaml_path = yaml_path
    CustomDataLoader.source = source
    # TODO - rename?
    CustomDataLoader.source_dir = dataloader_dir

    # Add init_example method
    CustomDataLoader.example_kwargs = example_kwargs(CustomDataLoader.args)

    def init_example(cls):
        return cls(**cls.example_kwargs)
    CustomDataLoader.init_example = classmethod(init_example)
    CustomDataLoader.print_args = classmethod(print_dl_kwargs)

    return CustomDataLoader
Example #3
0
    def _get_component_descr(self, component):
        from kipoi.specs import ModelDescription, DataLoaderDescription

        # render the template
        rendered_yaml = self.template.render(**self.get_model_row(component))

        if self.which == 'model':
            return ModelDescription.from_string(rendered_yaml)
        elif self.which == 'dataloader':
            return DataLoaderDescription.from_string(rendered_yaml)
        else:
            raise ValueError("Unknown component {}".format(self.which))
Example #4
0
    def wrap(cls):
        if inspect.isfunction(cls):
            raise ValueError(
                "Function-based dataloader are currently not supported with kipoi_dataloader decorator"
            )

        # figure out the right dataloader type
        dl_type_inferred = infer_parent_class(cls, AVAILABLE_DATALOADERS)
        if dl_type_inferred is None:
            raise ValueError(
                "Dataloader needs to inherit from one of the available dataloaders {}"
                .format(list(AVAILABLE_DATALOADERS)))

        # or not inherits_from(cls, Dataset)
        doc = cls.__doc__
        doc = textwrap.dedent(doc)  # de-indent

        if not re.match("^defined_as: ", doc):
            doc = "defined_as: {}\n".format(cls.__name__) + doc
        if not re.match("^type: ", doc):
            doc = "type: {}\n".format(dl_type_inferred) + doc

        # parse the yaml
        yaml_dict = related.from_yaml(doc)
        dl_descr = DataLoaderDescription.from_config(yaml_dict)

        # override parameters
        for k, v in six.iteritems(override):
            rsetattr(dl_descr, k, v)

        # setup optional parameters
        arg_names, default_values = _get_arg_name_values(cls)

        if set(dl_descr.args) != set(arg_names):
            raise ValueError(
                "Described args don't exactly match the implemented arguments"
                "docstring: {}, actual: {}".format(list(dl_descr.args),
                                                   list(arg_names)))

        # properly set optional / non-optional argument values
        for i, arg in enumerate(dl_descr.args):
            optional = i >= len(arg_names) - len(default_values)
            if dl_descr.args[arg].optional and not optional:
                logger.warning(
                    "Parameter {} was specified as optional. However, there "
                    "are no defaults for it. Specifying it as not optinal".
                    format(arg))
            dl_descr.args[arg].optional = optional

        dl_descr.info.name = cls.__name__

        # enrich the class with dataloader description
        return cls._add_description_factory(dl_descr)
Example #5
0
def load_component_descr(component_dir, which="model"):
    """Return the parsed yaml file
    """
    from kipoi.specs import ModelDescription, DataLoaderDescription

    fname = get_component_file(os.path.abspath(component_dir), which, raise_err=True)

    with cd(os.path.dirname(fname)):
        if which == "model":
            return ModelDescription.load(fname)
        elif which == "dataloader":
            return DataLoaderDescription.load(fname)
        else:
            raise ValueError("which needs to be from {'model', 'dataloader'}")
Example #6
0
def get_dataloader_factory(dataloader):

    # pull the dataloader & get the dataloader directory
    yaml_path = './model/dataloader.yaml'
    dataloader_dir = './model/'

    # --------------------------------------------
    # Setup dataloader description
    with cd(dataloader_dir):  # move to the dataloader directory temporarily
        dl = DataLoaderDescription.load(os.path.basename(yaml_path))
        file_path, obj_name = tuple(dl.defined_as.split("::"))
        CustomDataLoader = getattr(load_module(file_path), obj_name)

    # check that dl.type is correct
    if dl.type not in AVAILABLE_DATALOADERS:
        raise ValueError(
            "dataloader type: {0} is not in supported dataloaders:{1}".format(
                dl.type, list(AVAILABLE_DATALOADERS.keys())))
    # check that the extractor arguments match yaml arguments
    if not getargs(CustomDataLoader) == set(dl.args.keys()):
        raise ValueError("DataLoader arguments: \n{0}\n don't match ".format(
            set(getargs(CustomDataLoader))) +
                         "the specification in the dataloader.yaml file:\n{0}".
                         format(set(dl.args.keys())))
    # check that CustomDataLoader indeed interits from the right DataLoader
    if dl.type in DATALOADERS_AS_FUNCTIONS:
        # transform the functions into objects
        assert isinstance(CustomDataLoader, types.FunctionType)
        CustomDataLoader = AVAILABLE_DATALOADERS[dl.type].from_fn(
            CustomDataLoader)
    else:
        if not issubclass(CustomDataLoader, AVAILABLE_DATALOADERS[dl.type]):
            raise ValueError(
                "DataLoader does't inherit from the specified dataloader: {0}".
                format(AVAILABLE_DATALOADERS[dl.type].__name__))

    # Inherit the attributes from dl
    CustomDataLoader.type = dl.type
    CustomDataLoader.defined_as = dl.defined_as
    CustomDataLoader.args = dl.args
    CustomDataLoader.info = dl.info
    CustomDataLoader.output_schema = dl.output_schema
    CustomDataLoader.dependencies = dl.dependencies
    CustomDataLoader.postprocessing = dl.postprocessing
    CustomDataLoader._yaml_path = yaml_path
    CustomDataLoader.source_dir = dataloader_dir
    #CustomDataLoader.print_args = classmethod(print_dl_kwargs)

    return CustomDataLoader
def test_ModelDescription():
    for rc_support in [True, False]:
        seq_string_shape = ""
        if rc_support:
            ssrs = supports_simple_rc_str
        else:
            ssrs = ""
        model = ModelDescription.from_config(
            from_yaml(model_yaml % (seq_string_shape, ssrs)))
        dataloader = DataLoaderDescription.from_config(
            from_yaml(dataloader_yaml % (seq_string_shape)))
        mi = ModelInfoExtractor(model, dataloader)
        assert mi.use_seq_only_rc == rc_support
        assert all([
            isinstance(mi.seq_input_mutator[sl], OneHotSequenceMutator)
            for sl in ["seq_a", "seq_c"]
        ])
        assert all([
            isinstance(mi.seq_input_mutator[sl], DNAStringSequenceMutator)
            for sl in ["seq_b"]
        ])
        assert all([
            mi.seq_input_metadata[sl] == "ranges" for sl in ["seq_a", "seq_b"]
        ])
        assert all(
            [mi.seq_input_metadata[sl] == "ranges_b" for sl in ["seq_c"]])
        assert all([
            isinstance(mi.seq_input_array_trafo[sl], ReshapeDna)
            for sl in ["seq_a", "seq_c"]
        ])
        assert all([
            isinstance(mi.seq_input_array_trafo[sl], ReshapeDnaString)
            for sl in ["seq_b"]
        ])

        # Test whether the model infor extractor also works without missing shapes in the dataloader schema definition.
        model = ModelDescription.from_config(
            from_yaml(model_yaml % (seq_string_shape, ssrs)))
        dataloader = DataLoaderDescription.from_config(
            from_yaml(dataloader_yaml_noshapes))
        mi = ModelInfoExtractor(model, dataloader)
        assert mi.use_seq_only_rc == rc_support
        assert all([
            isinstance(mi.seq_input_mutator[sl], OneHotSequenceMutator)
            for sl in ["seq_a", "seq_c"]
        ])
        assert all([
            isinstance(mi.seq_input_mutator[sl], DNAStringSequenceMutator)
            for sl in ["seq_b"]
        ])
        assert all([
            mi.seq_input_metadata[sl] == "ranges" for sl in ["seq_a", "seq_b"]
        ])
        assert all(
            [mi.seq_input_metadata[sl] == "ranges_b" for sl in ["seq_c"]])
        assert all([
            isinstance(mi.seq_input_array_trafo[sl], ReshapeDna)
            for sl in ["seq_a", "seq_c"]
        ])
        assert all([
            isinstance(mi.seq_input_array_trafo[sl], ReshapeDnaString)
            for sl in ["seq_b"]
        ])
Example #8
0
def get_dataloader_factory(dataloader, source="kipoi"):
    """Loads the dataloader

    # Arguments
        dataloader (str): dataloader name
        source (str): source name

    # Returns
    - Instance of class inheriting from `kipoi.data.BaseDataLoader` (like `kipoi.data.Dataset`)
           decorated with additional attributes.

    # Methods
    - __batch_iter(batch_size, num_workers, **kwargs)__
         - Arguments
             - **batch_size**: batch size
             - **num_workers**: Number of workers to use in parallel.
             - ****kwargs**: Other kwargs specific to each dataloader
         - Yields
             - `dict` with `"inputs"`, `"targets"` and `"metadata"`
    - __batch_train_iter(cycle=True, **kwargs)__
         - Arguments
             - **cycle**: if True, cycle indefinitely
             - ****kwargs**: Kwargs passed to `batch_iter()` like `batch_size`
         - Yields
             - tuple of ("inputs", "targets") from the usual dict returned by `batch_iter()`
    - __batch_predict_iter(**kwargs)__
         - Arguments
             - ****kwargs**: Kwargs passed to `batch_iter()` like `batch_size`
         - Yields
             - "inputs" field from the usual dict returned by `batch_iter()`
    - __load_all(**kwargs)__ - load the whole dataset into memory
         - Arguments
             - ****kwargs**: Kwargs passed to `batch_iter()` like `batch_size`
         - Returns
             - `dict` with `"inputs"`, `"targets"` and `"metadata"`
    - **init_example()** - instantiate the dataloader with example kwargs
    - **print_args()** - print information about the required arguments

    # Appended attributes
    - **type** (str): dataloader type (class name)
    - **defined_as** (str): path and dataloader name
    - **args** (list of kipoi.specs.DataLoaderArgument): datalaoder argument description
    - **info** (kipoi.specs.Info): general information about the dataloader
    - **schema** (kipoi.specs.DataloaderSchema): information about the input/output
            data modalities
    - **dependencies** (kipoi.specs.Dependencies): class specifying the dependencies.
          (implements `install` method for running the installation)
    - **name** (str): model name
    - **source** (str): model source
    - **source_dir** (str): local path to model source storage
    - **postprocessing** (dict): dictionary of loaded plugin specifications
    - **example_kwargs** (dict): kwargs for running the provided example
    """

    # pull the dataloader & get the dataloader directory
    source = kipoi.config.get_source(source)
    yaml_path = source.pull_dataloader(dataloader)
    dataloader_dir = os.path.dirname(yaml_path)

    # --------------------------------------------
    # Setup dataloader description
    with cd(dataloader_dir):  # move to the dataloader directory temporarily
        dl = DataLoaderDescription.load(os.path.basename(yaml_path))
        file_path, obj_name = tuple(dl.defined_as.split("::"))
        CustomDataLoader = getattr(load_module(file_path), obj_name)

    # check that dl.type is correct
    if dl.type not in AVAILABLE_DATALOADERS:
        raise ValueError("dataloader type: {0} is not in supported dataloaders:{1}".
                         format(dl.type, list(AVAILABLE_DATALOADERS.keys())))
    # check that the extractor arguments match yaml arguments
    if not getargs(CustomDataLoader) == set(dl.args.keys()):
        raise ValueError("DataLoader arguments: \n{0}\n don't match ".format(set(getargs(CustomDataLoader))) +
                         "the specification in the dataloader.yaml file:\n{0}".
                         format(set(dl.args.keys())))
    # check that CustomDataLoader indeed interits from the right DataLoader
    if dl.type in DATALOADERS_AS_FUNCTIONS:
        # transform the functions into objects
        assert isinstance(CustomDataLoader, types.FunctionType)
        CustomDataLoader = AVAILABLE_DATALOADERS[dl.type].from_fn(CustomDataLoader)
    else:
        if not issubclass(CustomDataLoader, AVAILABLE_DATALOADERS[dl.type]):
            raise ValueError("DataLoader does't inherit from the specified dataloader: {0}".
                             format(AVAILABLE_DATALOADERS[dl.type].__name__))
    logger.info('successfully loaded the dataloader from {}'.
                format(os.path.normpath(os.path.join(dataloader_dir, dl.defined_as))))
    # Inherit the attributes from dl
    # TODO - make this more automatic / DRY
    # write a method to load those things?
    CustomDataLoader.type = dl.type
    CustomDataLoader.defined_as = dl.defined_as
    CustomDataLoader.args = dl.args
    CustomDataLoader.info = dl.info
    CustomDataLoader.output_schema = dl.output_schema
    CustomDataLoader.dependencies = dl.dependencies
    CustomDataLoader.postprocessing = dl.postprocessing
    # keep it hidden?
    CustomDataLoader._yaml_path = yaml_path
    CustomDataLoader.source = source
    # TODO - rename?
    CustomDataLoader.source_dir = dataloader_dir

    # Add init_example method
    CustomDataLoader.example_kwargs = example_kwargs(CustomDataLoader.args)

    def init_example(cls):
        return cls(**cls.example_kwargs)
    CustomDataLoader.init_example = classmethod(init_example)
    CustomDataLoader.print_args = classmethod(print_dl_kwargs)

    return CustomDataLoader