Beispiel #1
0
def test_model_loading_on_examples(example):
    """Test extractor
    """
    model_file = "example/models/{0}/dataloader.yaml".format(example)

    dl = DataLoaderDescription.load(model_file)

    # check all the fields exists
    dl.type == "Dataset"

    dl.defined_as
    dl.args
    arg_elem = six.next(six.itervalues(dl.args))
    arg_elem.doc
    arg_elem.type
    arg_elem.optional

    dl.info
    dl.info.authors
    dl.info.name
    dl.info.version
    dl.info.tags
    dl.info.doc

    dl.output_schema
    dl.output_schema.inputs
    inp_elem = six.next(six.itervalues(dl.output_schema.inputs))
    inp_elem.shape
    inp_elem.special_type
    inp_elem.associated_metadata

    dl.output_schema.targets

    dl.output_schema.metadata
Beispiel #2
0
def get_dataloader_factory(dataloader, source="kipoi"):

    # pull the dataloader & get the dataloader directory
    source = kipoi.config.get_source(source)
    yaml_path = source.pull_dataloader(dataloader)
    dataloader_dir = os.path.dirname(yaml_path)

    # --------------------------------------------
    # Setup dataloader description
    with cd(dataloader_dir):  # move to the dataloader directory temporarily
        dl = DataLoaderDescription.load(os.path.basename(yaml_path))
        file_path, obj_name = tuple(dl.defined_as.split("::"))
        CustomDataLoader = getattr(load_module(file_path), obj_name)

    # check that dl.type is correct
    if dl.type not in AVAILABLE_DATALOADERS:
        raise ValueError("dataloader type: {0} is not in supported dataloaders:{1}".
                         format(dl.type, list(AVAILABLE_DATALOADERS.keys())))
    # check that the extractor arguments match yaml arguments
    if not getargs(CustomDataLoader) == set(dl.args.keys()):
        raise ValueError("DataLoader arguments: \n{0}\n don't match ".format(set(getargs(CustomDataLoader))) +
                         "the specification in the dataloader.yaml file:\n{0}".
                         format(set(dl.args.keys())))
    # check that CustomDataLoader indeed interits from the right DataLoader
    if dl.type in DATALOADERS_AS_FUNCTIONS:
        # transform the functions into objects
        assert isinstance(CustomDataLoader, types.FunctionType)
        CustomDataLoader = AVAILABLE_DATALOADERS[dl.type].from_fn(CustomDataLoader)
    else:
        if not issubclass(CustomDataLoader, AVAILABLE_DATALOADERS[dl.type]):
            raise ValueError("DataLoader does't inherit from the specified dataloader: {0}".
                             format(AVAILABLE_DATALOADERS[dl.type].__name__))
    logger.info('successfully loaded the dataloader from {}'.
                format(os.path.normpath(os.path.join(dataloader_dir, dl.defined_as))))
    # Inherit the attributes from dl
    # TODO - make this more automatic / DRY
    # write a method to load those things?
    CustomDataLoader.type = dl.type
    CustomDataLoader.defined_as = dl.defined_as
    CustomDataLoader.args = dl.args
    CustomDataLoader.info = dl.info
    CustomDataLoader.output_schema = dl.output_schema
    CustomDataLoader.dependencies = dl.dependencies
    CustomDataLoader.postprocessing = dl.postprocessing
    # keep it hidden?
    CustomDataLoader._yaml_path = yaml_path
    CustomDataLoader.source = source
    # TODO - rename?
    CustomDataLoader.source_dir = dataloader_dir

    # Add init_example method
    CustomDataLoader.example_kwargs = example_kwargs(CustomDataLoader.args)

    def init_example(cls):
        return cls(**cls.example_kwargs)
    CustomDataLoader.init_example = classmethod(init_example)
    CustomDataLoader.print_args = classmethod(print_dl_kwargs)

    return CustomDataLoader
Beispiel #3
0
def load_component_descr(component_dir, which="model"):
    """Return the parsed yaml file
    """
    from kipoi.specs import ModelDescription, DataLoaderDescription

    fname = get_component_file(os.path.abspath(component_dir), which, raise_err=True)

    with cd(os.path.dirname(fname)):
        if which == "model":
            return ModelDescription.load(fname)
        elif which == "dataloader":
            return DataLoaderDescription.load(fname)
        else:
            raise ValueError("which needs to be from {'model', 'dataloader'}")
Beispiel #4
0
def get_dataloader_factory(dataloader):

    # pull the dataloader & get the dataloader directory
    yaml_path = './model/dataloader.yaml'
    dataloader_dir = './model/'

    # --------------------------------------------
    # Setup dataloader description
    with cd(dataloader_dir):  # move to the dataloader directory temporarily
        dl = DataLoaderDescription.load(os.path.basename(yaml_path))
        file_path, obj_name = tuple(dl.defined_as.split("::"))
        CustomDataLoader = getattr(load_module(file_path), obj_name)

    # check that dl.type is correct
    if dl.type not in AVAILABLE_DATALOADERS:
        raise ValueError(
            "dataloader type: {0} is not in supported dataloaders:{1}".format(
                dl.type, list(AVAILABLE_DATALOADERS.keys())))
    # check that the extractor arguments match yaml arguments
    if not getargs(CustomDataLoader) == set(dl.args.keys()):
        raise ValueError("DataLoader arguments: \n{0}\n don't match ".format(
            set(getargs(CustomDataLoader))) +
                         "the specification in the dataloader.yaml file:\n{0}".
                         format(set(dl.args.keys())))
    # check that CustomDataLoader indeed interits from the right DataLoader
    if dl.type in DATALOADERS_AS_FUNCTIONS:
        # transform the functions into objects
        assert isinstance(CustomDataLoader, types.FunctionType)
        CustomDataLoader = AVAILABLE_DATALOADERS[dl.type].from_fn(
            CustomDataLoader)
    else:
        if not issubclass(CustomDataLoader, AVAILABLE_DATALOADERS[dl.type]):
            raise ValueError(
                "DataLoader does't inherit from the specified dataloader: {0}".
                format(AVAILABLE_DATALOADERS[dl.type].__name__))

    # Inherit the attributes from dl
    CustomDataLoader.type = dl.type
    CustomDataLoader.defined_as = dl.defined_as
    CustomDataLoader.args = dl.args
    CustomDataLoader.info = dl.info
    CustomDataLoader.output_schema = dl.output_schema
    CustomDataLoader.dependencies = dl.dependencies
    CustomDataLoader.postprocessing = dl.postprocessing
    CustomDataLoader._yaml_path = yaml_path
    CustomDataLoader.source_dir = dataloader_dir
    #CustomDataLoader.print_args = classmethod(print_dl_kwargs)

    return CustomDataLoader
Beispiel #5
0
def get_dataloader_factory(dataloader, source="kipoi"):
    """Loads the dataloader

    # Arguments
        dataloader (str): dataloader name
        source (str): source name

    # Returns
    - Instance of class inheriting from `kipoi.data.BaseDataLoader` (like `kipoi.data.Dataset`)
           decorated with additional attributes.

    # Methods
    - __batch_iter(batch_size, num_workers, **kwargs)__
         - Arguments
             - **batch_size**: batch size
             - **num_workers**: Number of workers to use in parallel.
             - ****kwargs**: Other kwargs specific to each dataloader
         - Yields
             - `dict` with `"inputs"`, `"targets"` and `"metadata"`
    - __batch_train_iter(cycle=True, **kwargs)__
         - Arguments
             - **cycle**: if True, cycle indefinitely
             - ****kwargs**: Kwargs passed to `batch_iter()` like `batch_size`
         - Yields
             - tuple of ("inputs", "targets") from the usual dict returned by `batch_iter()`
    - __batch_predict_iter(**kwargs)__
         - Arguments
             - ****kwargs**: Kwargs passed to `batch_iter()` like `batch_size`
         - Yields
             - "inputs" field from the usual dict returned by `batch_iter()`
    - __load_all(**kwargs)__ - load the whole dataset into memory
         - Arguments
             - ****kwargs**: Kwargs passed to `batch_iter()` like `batch_size`
         - Returns
             - `dict` with `"inputs"`, `"targets"` and `"metadata"`
    - **init_example()** - instantiate the dataloader with example kwargs
    - **print_args()** - print information about the required arguments

    # Appended attributes
    - **type** (str): dataloader type (class name)
    - **defined_as** (str): path and dataloader name
    - **args** (list of kipoi.specs.DataLoaderArgument): datalaoder argument description
    - **info** (kipoi.specs.Info): general information about the dataloader
    - **schema** (kipoi.specs.DataloaderSchema): information about the input/output
            data modalities
    - **dependencies** (kipoi.specs.Dependencies): class specifying the dependencies.
          (implements `install` method for running the installation)
    - **name** (str): model name
    - **source** (str): model source
    - **source_dir** (str): local path to model source storage
    - **postprocessing** (dict): dictionary of loaded plugin specifications
    - **example_kwargs** (dict): kwargs for running the provided example
    """

    # pull the dataloader & get the dataloader directory
    source = kipoi.config.get_source(source)
    yaml_path = source.pull_dataloader(dataloader)
    dataloader_dir = os.path.dirname(yaml_path)

    # --------------------------------------------
    # Setup dataloader description
    with cd(dataloader_dir):  # move to the dataloader directory temporarily
        dl = DataLoaderDescription.load(os.path.basename(yaml_path))
        file_path, obj_name = tuple(dl.defined_as.split("::"))
        CustomDataLoader = getattr(load_module(file_path), obj_name)

    # check that dl.type is correct
    if dl.type not in AVAILABLE_DATALOADERS:
        raise ValueError("dataloader type: {0} is not in supported dataloaders:{1}".
                         format(dl.type, list(AVAILABLE_DATALOADERS.keys())))
    # check that the extractor arguments match yaml arguments
    if not getargs(CustomDataLoader) == set(dl.args.keys()):
        raise ValueError("DataLoader arguments: \n{0}\n don't match ".format(set(getargs(CustomDataLoader))) +
                         "the specification in the dataloader.yaml file:\n{0}".
                         format(set(dl.args.keys())))
    # check that CustomDataLoader indeed interits from the right DataLoader
    if dl.type in DATALOADERS_AS_FUNCTIONS:
        # transform the functions into objects
        assert isinstance(CustomDataLoader, types.FunctionType)
        CustomDataLoader = AVAILABLE_DATALOADERS[dl.type].from_fn(CustomDataLoader)
    else:
        if not issubclass(CustomDataLoader, AVAILABLE_DATALOADERS[dl.type]):
            raise ValueError("DataLoader does't inherit from the specified dataloader: {0}".
                             format(AVAILABLE_DATALOADERS[dl.type].__name__))
    logger.info('successfully loaded the dataloader from {}'.
                format(os.path.normpath(os.path.join(dataloader_dir, dl.defined_as))))
    # Inherit the attributes from dl
    # TODO - make this more automatic / DRY
    # write a method to load those things?
    CustomDataLoader.type = dl.type
    CustomDataLoader.defined_as = dl.defined_as
    CustomDataLoader.args = dl.args
    CustomDataLoader.info = dl.info
    CustomDataLoader.output_schema = dl.output_schema
    CustomDataLoader.dependencies = dl.dependencies
    CustomDataLoader.postprocessing = dl.postprocessing
    # keep it hidden?
    CustomDataLoader._yaml_path = yaml_path
    CustomDataLoader.source = source
    # TODO - rename?
    CustomDataLoader.source_dir = dataloader_dir

    # Add init_example method
    CustomDataLoader.example_kwargs = example_kwargs(CustomDataLoader.args)

    def init_example(cls):
        return cls(**cls.example_kwargs)
    CustomDataLoader.init_example = classmethod(init_example)
    CustomDataLoader.print_args = classmethod(print_dl_kwargs)

    return CustomDataLoader