def test_model_loading_on_examples(example): """Test extractor """ model_file = "example/models/{0}/dataloader.yaml".format(example) dl = DataLoaderDescription.load(model_file) # check all the fields exists dl.type == "Dataset" dl.defined_as dl.args arg_elem = six.next(six.itervalues(dl.args)) arg_elem.doc arg_elem.type arg_elem.optional dl.info dl.info.authors dl.info.name dl.info.version dl.info.tags dl.info.doc dl.output_schema dl.output_schema.inputs inp_elem = six.next(six.itervalues(dl.output_schema.inputs)) inp_elem.shape inp_elem.special_type inp_elem.associated_metadata dl.output_schema.targets dl.output_schema.metadata
def get_dataloader_factory(dataloader, source="kipoi"): # pull the dataloader & get the dataloader directory source = kipoi.config.get_source(source) yaml_path = source.pull_dataloader(dataloader) dataloader_dir = os.path.dirname(yaml_path) # -------------------------------------------- # Setup dataloader description with cd(dataloader_dir): # move to the dataloader directory temporarily dl = DataLoaderDescription.load(os.path.basename(yaml_path)) file_path, obj_name = tuple(dl.defined_as.split("::")) CustomDataLoader = getattr(load_module(file_path), obj_name) # check that dl.type is correct if dl.type not in AVAILABLE_DATALOADERS: raise ValueError("dataloader type: {0} is not in supported dataloaders:{1}". format(dl.type, list(AVAILABLE_DATALOADERS.keys()))) # check that the extractor arguments match yaml arguments if not getargs(CustomDataLoader) == set(dl.args.keys()): raise ValueError("DataLoader arguments: \n{0}\n don't match ".format(set(getargs(CustomDataLoader))) + "the specification in the dataloader.yaml file:\n{0}". format(set(dl.args.keys()))) # check that CustomDataLoader indeed interits from the right DataLoader if dl.type in DATALOADERS_AS_FUNCTIONS: # transform the functions into objects assert isinstance(CustomDataLoader, types.FunctionType) CustomDataLoader = AVAILABLE_DATALOADERS[dl.type].from_fn(CustomDataLoader) else: if not issubclass(CustomDataLoader, AVAILABLE_DATALOADERS[dl.type]): raise ValueError("DataLoader does't inherit from the specified dataloader: {0}". format(AVAILABLE_DATALOADERS[dl.type].__name__)) logger.info('successfully loaded the dataloader from {}'. format(os.path.normpath(os.path.join(dataloader_dir, dl.defined_as)))) # Inherit the attributes from dl # TODO - make this more automatic / DRY # write a method to load those things? CustomDataLoader.type = dl.type CustomDataLoader.defined_as = dl.defined_as CustomDataLoader.args = dl.args CustomDataLoader.info = dl.info CustomDataLoader.output_schema = dl.output_schema CustomDataLoader.dependencies = dl.dependencies CustomDataLoader.postprocessing = dl.postprocessing # keep it hidden? CustomDataLoader._yaml_path = yaml_path CustomDataLoader.source = source # TODO - rename? CustomDataLoader.source_dir = dataloader_dir # Add init_example method CustomDataLoader.example_kwargs = example_kwargs(CustomDataLoader.args) def init_example(cls): return cls(**cls.example_kwargs) CustomDataLoader.init_example = classmethod(init_example) CustomDataLoader.print_args = classmethod(print_dl_kwargs) return CustomDataLoader
def _get_component_descr(self, component): from kipoi.specs import ModelDescription, DataLoaderDescription # render the template rendered_yaml = self.template.render(**self.get_model_row(component)) if self.which == 'model': return ModelDescription.from_string(rendered_yaml) elif self.which == 'dataloader': return DataLoaderDescription.from_string(rendered_yaml) else: raise ValueError("Unknown component {}".format(self.which))
def wrap(cls): if inspect.isfunction(cls): raise ValueError( "Function-based dataloader are currently not supported with kipoi_dataloader decorator" ) # figure out the right dataloader type dl_type_inferred = infer_parent_class(cls, AVAILABLE_DATALOADERS) if dl_type_inferred is None: raise ValueError( "Dataloader needs to inherit from one of the available dataloaders {}" .format(list(AVAILABLE_DATALOADERS))) # or not inherits_from(cls, Dataset) doc = cls.__doc__ doc = textwrap.dedent(doc) # de-indent if not re.match("^defined_as: ", doc): doc = "defined_as: {}\n".format(cls.__name__) + doc if not re.match("^type: ", doc): doc = "type: {}\n".format(dl_type_inferred) + doc # parse the yaml yaml_dict = related.from_yaml(doc) dl_descr = DataLoaderDescription.from_config(yaml_dict) # override parameters for k, v in six.iteritems(override): rsetattr(dl_descr, k, v) # setup optional parameters arg_names, default_values = _get_arg_name_values(cls) if set(dl_descr.args) != set(arg_names): raise ValueError( "Described args don't exactly match the implemented arguments" "docstring: {}, actual: {}".format(list(dl_descr.args), list(arg_names))) # properly set optional / non-optional argument values for i, arg in enumerate(dl_descr.args): optional = i >= len(arg_names) - len(default_values) if dl_descr.args[arg].optional and not optional: logger.warning( "Parameter {} was specified as optional. However, there " "are no defaults for it. Specifying it as not optinal". format(arg)) dl_descr.args[arg].optional = optional dl_descr.info.name = cls.__name__ # enrich the class with dataloader description return cls._add_description_factory(dl_descr)
def load_component_descr(component_dir, which="model"): """Return the parsed yaml file """ from kipoi.specs import ModelDescription, DataLoaderDescription fname = get_component_file(os.path.abspath(component_dir), which, raise_err=True) with cd(os.path.dirname(fname)): if which == "model": return ModelDescription.load(fname) elif which == "dataloader": return DataLoaderDescription.load(fname) else: raise ValueError("which needs to be from {'model', 'dataloader'}")
def get_dataloader_factory(dataloader): # pull the dataloader & get the dataloader directory yaml_path = './model/dataloader.yaml' dataloader_dir = './model/' # -------------------------------------------- # Setup dataloader description with cd(dataloader_dir): # move to the dataloader directory temporarily dl = DataLoaderDescription.load(os.path.basename(yaml_path)) file_path, obj_name = tuple(dl.defined_as.split("::")) CustomDataLoader = getattr(load_module(file_path), obj_name) # check that dl.type is correct if dl.type not in AVAILABLE_DATALOADERS: raise ValueError( "dataloader type: {0} is not in supported dataloaders:{1}".format( dl.type, list(AVAILABLE_DATALOADERS.keys()))) # check that the extractor arguments match yaml arguments if not getargs(CustomDataLoader) == set(dl.args.keys()): raise ValueError("DataLoader arguments: \n{0}\n don't match ".format( set(getargs(CustomDataLoader))) + "the specification in the dataloader.yaml file:\n{0}". format(set(dl.args.keys()))) # check that CustomDataLoader indeed interits from the right DataLoader if dl.type in DATALOADERS_AS_FUNCTIONS: # transform the functions into objects assert isinstance(CustomDataLoader, types.FunctionType) CustomDataLoader = AVAILABLE_DATALOADERS[dl.type].from_fn( CustomDataLoader) else: if not issubclass(CustomDataLoader, AVAILABLE_DATALOADERS[dl.type]): raise ValueError( "DataLoader does't inherit from the specified dataloader: {0}". format(AVAILABLE_DATALOADERS[dl.type].__name__)) # Inherit the attributes from dl CustomDataLoader.type = dl.type CustomDataLoader.defined_as = dl.defined_as CustomDataLoader.args = dl.args CustomDataLoader.info = dl.info CustomDataLoader.output_schema = dl.output_schema CustomDataLoader.dependencies = dl.dependencies CustomDataLoader.postprocessing = dl.postprocessing CustomDataLoader._yaml_path = yaml_path CustomDataLoader.source_dir = dataloader_dir #CustomDataLoader.print_args = classmethod(print_dl_kwargs) return CustomDataLoader
def test_ModelDescription(): for rc_support in [True, False]: seq_string_shape = "" if rc_support: ssrs = supports_simple_rc_str else: ssrs = "" model = ModelDescription.from_config( from_yaml(model_yaml % (seq_string_shape, ssrs))) dataloader = DataLoaderDescription.from_config( from_yaml(dataloader_yaml % (seq_string_shape))) mi = ModelInfoExtractor(model, dataloader) assert mi.use_seq_only_rc == rc_support assert all([ isinstance(mi.seq_input_mutator[sl], OneHotSequenceMutator) for sl in ["seq_a", "seq_c"] ]) assert all([ isinstance(mi.seq_input_mutator[sl], DNAStringSequenceMutator) for sl in ["seq_b"] ]) assert all([ mi.seq_input_metadata[sl] == "ranges" for sl in ["seq_a", "seq_b"] ]) assert all( [mi.seq_input_metadata[sl] == "ranges_b" for sl in ["seq_c"]]) assert all([ isinstance(mi.seq_input_array_trafo[sl], ReshapeDna) for sl in ["seq_a", "seq_c"] ]) assert all([ isinstance(mi.seq_input_array_trafo[sl], ReshapeDnaString) for sl in ["seq_b"] ]) # Test whether the model infor extractor also works without missing shapes in the dataloader schema definition. model = ModelDescription.from_config( from_yaml(model_yaml % (seq_string_shape, ssrs))) dataloader = DataLoaderDescription.from_config( from_yaml(dataloader_yaml_noshapes)) mi = ModelInfoExtractor(model, dataloader) assert mi.use_seq_only_rc == rc_support assert all([ isinstance(mi.seq_input_mutator[sl], OneHotSequenceMutator) for sl in ["seq_a", "seq_c"] ]) assert all([ isinstance(mi.seq_input_mutator[sl], DNAStringSequenceMutator) for sl in ["seq_b"] ]) assert all([ mi.seq_input_metadata[sl] == "ranges" for sl in ["seq_a", "seq_b"] ]) assert all( [mi.seq_input_metadata[sl] == "ranges_b" for sl in ["seq_c"]]) assert all([ isinstance(mi.seq_input_array_trafo[sl], ReshapeDna) for sl in ["seq_a", "seq_c"] ]) assert all([ isinstance(mi.seq_input_array_trafo[sl], ReshapeDnaString) for sl in ["seq_b"] ])
def get_dataloader_factory(dataloader, source="kipoi"): """Loads the dataloader # Arguments dataloader (str): dataloader name source (str): source name # Returns - Instance of class inheriting from `kipoi.data.BaseDataLoader` (like `kipoi.data.Dataset`) decorated with additional attributes. # Methods - __batch_iter(batch_size, num_workers, **kwargs)__ - Arguments - **batch_size**: batch size - **num_workers**: Number of workers to use in parallel. - ****kwargs**: Other kwargs specific to each dataloader - Yields - `dict` with `"inputs"`, `"targets"` and `"metadata"` - __batch_train_iter(cycle=True, **kwargs)__ - Arguments - **cycle**: if True, cycle indefinitely - ****kwargs**: Kwargs passed to `batch_iter()` like `batch_size` - Yields - tuple of ("inputs", "targets") from the usual dict returned by `batch_iter()` - __batch_predict_iter(**kwargs)__ - Arguments - ****kwargs**: Kwargs passed to `batch_iter()` like `batch_size` - Yields - "inputs" field from the usual dict returned by `batch_iter()` - __load_all(**kwargs)__ - load the whole dataset into memory - Arguments - ****kwargs**: Kwargs passed to `batch_iter()` like `batch_size` - Returns - `dict` with `"inputs"`, `"targets"` and `"metadata"` - **init_example()** - instantiate the dataloader with example kwargs - **print_args()** - print information about the required arguments # Appended attributes - **type** (str): dataloader type (class name) - **defined_as** (str): path and dataloader name - **args** (list of kipoi.specs.DataLoaderArgument): datalaoder argument description - **info** (kipoi.specs.Info): general information about the dataloader - **schema** (kipoi.specs.DataloaderSchema): information about the input/output data modalities - **dependencies** (kipoi.specs.Dependencies): class specifying the dependencies. (implements `install` method for running the installation) - **name** (str): model name - **source** (str): model source - **source_dir** (str): local path to model source storage - **postprocessing** (dict): dictionary of loaded plugin specifications - **example_kwargs** (dict): kwargs for running the provided example """ # pull the dataloader & get the dataloader directory source = kipoi.config.get_source(source) yaml_path = source.pull_dataloader(dataloader) dataloader_dir = os.path.dirname(yaml_path) # -------------------------------------------- # Setup dataloader description with cd(dataloader_dir): # move to the dataloader directory temporarily dl = DataLoaderDescription.load(os.path.basename(yaml_path)) file_path, obj_name = tuple(dl.defined_as.split("::")) CustomDataLoader = getattr(load_module(file_path), obj_name) # check that dl.type is correct if dl.type not in AVAILABLE_DATALOADERS: raise ValueError("dataloader type: {0} is not in supported dataloaders:{1}". format(dl.type, list(AVAILABLE_DATALOADERS.keys()))) # check that the extractor arguments match yaml arguments if not getargs(CustomDataLoader) == set(dl.args.keys()): raise ValueError("DataLoader arguments: \n{0}\n don't match ".format(set(getargs(CustomDataLoader))) + "the specification in the dataloader.yaml file:\n{0}". format(set(dl.args.keys()))) # check that CustomDataLoader indeed interits from the right DataLoader if dl.type in DATALOADERS_AS_FUNCTIONS: # transform the functions into objects assert isinstance(CustomDataLoader, types.FunctionType) CustomDataLoader = AVAILABLE_DATALOADERS[dl.type].from_fn(CustomDataLoader) else: if not issubclass(CustomDataLoader, AVAILABLE_DATALOADERS[dl.type]): raise ValueError("DataLoader does't inherit from the specified dataloader: {0}". format(AVAILABLE_DATALOADERS[dl.type].__name__)) logger.info('successfully loaded the dataloader from {}'. format(os.path.normpath(os.path.join(dataloader_dir, dl.defined_as)))) # Inherit the attributes from dl # TODO - make this more automatic / DRY # write a method to load those things? CustomDataLoader.type = dl.type CustomDataLoader.defined_as = dl.defined_as CustomDataLoader.args = dl.args CustomDataLoader.info = dl.info CustomDataLoader.output_schema = dl.output_schema CustomDataLoader.dependencies = dl.dependencies CustomDataLoader.postprocessing = dl.postprocessing # keep it hidden? CustomDataLoader._yaml_path = yaml_path CustomDataLoader.source = source # TODO - rename? CustomDataLoader.source_dir = dataloader_dir # Add init_example method CustomDataLoader.example_kwargs = example_kwargs(CustomDataLoader.args) def init_example(cls): return cls(**cls.example_kwargs) CustomDataLoader.init_example = classmethod(init_example) CustomDataLoader.print_args = classmethod(print_dl_kwargs) return CustomDataLoader