Esempio n. 1
0
def cli_get_example(command, raw_args):
    """Downloads the example files to the desired directory
    """
    assert command == "get-example"
    # setup the arg-parsing
    parser = argparse.ArgumentParser('kipoi {}'.format(command),
                                     description='Get example files')
    add_model(parser, source="kipoi")
    parser.add_argument("-o", "--output", default="example", required=False,
                        help="Output directory where to store the examples. Default: 'example'")
    args = parser.parse_args(raw_args)
    # --------------------------------------------
    md = kipoi.get_model_descr(args.model, args.source)
    src = kipoi.get_source(args.source)

    # load the default dataloader
    if isinstance(md.default_dataloader, kipoi.specs.DataLoaderImport):
        with cd(src.get_model_dir(args.model)):
            dl_descr = md.default_dataloader.get()
    else:
        # load from directory
        # attach the default dataloader already to the model
        dl_descr = kipoi.get_dataloader_descr(os.path.join(args.model, md.default_dataloader),
                                              source=args.source)

    kwargs = dl_descr.download_example(output_dir=args.output, dry_run=False)

    logger.info("Example files downloaded to: {}".format(args.output))
    logger.info("use the following dataloader kwargs:")
    print(json.dumps(kwargs))
Esempio n. 2
0
 def init_example(cls):
     """Instantiate the class using example_kwargs
     """
     if cls.source_dir is not None:
         with cd(cls.source_dir):
             # always init the example in the original directory
             return cls(**cls.example_kwargs)
     else:
         return cls(**cls.example_kwargs)
Esempio n. 3
0
def cli_info(command, raw_args):
    """CLI interface to predict
    """
    assert command == "info"
    parser = argparse.ArgumentParser('kipoi {}'.format(command),
                                     description="Prints dataloader" +
                                     " keyword arguments.")
    add_model(parser)
    add_dataloader(parser, with_args=False)
    args = parser.parse_args(raw_args)

    # --------------------------------------------
    # load model & dataloader
    md = kipoi.get_model_descr(args.model, args.source)
    src = kipoi.get_source(args.source)

    # load the default dataloader
    try:
        if isinstance(md.default_dataloader, kipoi.specs.DataLoaderImport):
            with cd(src.get_model_dir(args.model)):
                dl_descr = md.default_dataloader.get()
        else:
            # load from directory
            # attach the default dataloader already to the model
            dl_descr = kipoi.get_dataloader_descr(os.path.join(
                args.model, md.default_dataloader),
                                                  source=args.source)
    # if kipoiseq is not installed you get an ImportError
    except ImportError:
        dl_descr = None

    print("-" * 80)
    print("'{0}' from source '{1}'".format(str(args.model), str(args.source)))
    print("")
    print("Model information")
    print("-----------")
    print(md.info.get_config_as_yaml())
    if dl_descr:
        print("Dataloader arguments")
        print("--------------------")
        dl_descr.print_args()
    print("--------------------\n")
    print("Run `kipoi get-example {} -o example` to download example files.\n".
          format(args.model))
Esempio n. 4
0
def get_dataloader(dataloader, source="kipoi"):
    """Loads the dataloader

    # Arguments
        dataloader (str): dataloader name
        source (str): source name

    # Returns
    - Instance of class inheriting from `kipoi.data.BaseDataLoader` (like `kipoi.data.Dataset`)
           decorated with additional attributes.

    # Methods
    - __batch_iter(batch_size, num_workers, **kwargs)__
         - Arguments
             - **batch_size**: batch size
             - **num_workers**: Number of workers to use in parallel.
             - ****kwargs**: Other kwargs specific to each dataloader
         - Yields
             - `dict` with `"inputs"`, `"targets"` and `"metadata"`
    - __batch_train_iter(cycle=True, **kwargs)__
         - Arguments
             - **cycle**: if True, cycle indefinitely
             - ****kwargs**: Kwargs passed to `batch_iter()` like `batch_size`
         - Yields
             - tuple of ("inputs", "targets") from the usual dict returned by `batch_iter()`
    - __batch_predict_iter(**kwargs)__
         - Arguments
             - ****kwargs**: Kwargs passed to `batch_iter()` like `batch_size`
         - Yields
             - "inputs" field from the usual dict returned by `batch_iter()`
    - __load_all(**kwargs)__ - load the whole dataset into memory
         - Arguments
             - ****kwargs**: Kwargs passed to `batch_iter()` like `batch_size`
         - Returns
             - `dict` with `"inputs"`, `"targets"` and `"metadata"`
    - **init_example()** - instantiate the dataloader with example kwargs
    - **print_args()** - print information about the required arguments

    # Appended attributes
    - **type** (str): dataloader type (class name)
    - **defined_as** (str): path and dataloader name
    - **args** (list of kipoi.specs.DataLoaderArgument): datalaoder argument description
    - **info** (kipoi.specs.Info): general information about the dataloader
    - **schema** (kipoi.specs.DataloaderSchema): information about the input/output
            data modalities
    - **dependencies** (kipoi.specs.Dependencies): class specifying the dependencies.
          (implements `install` method for running the installation)
    - **name** (str): model name
    - **source** (str): model source
    - **source_dir** (str): local path to model source storage
    - **postprocessing** (dict): dictionary of loaded plugin specifications
    - **example_kwargs** (dict): kwargs for running the provided example
    """
    # if source == 'py':
    #     # load it from the python object
    #     sys.path.append(os.path.getcwd())
    #     return DataLoaderImport(defined_as=dataloader).get()
    # TODO - allow source=py

    # pull the dataloader & get the dataloader directory
    if isinstance(source, str):
        source = kipoi.config.get_source(source)
    source.pull_dataloader(dataloader)
    dataloader_dir = source.get_dataloader_dir(dataloader)

    # --------------------------------------------
    # Setup dataloader description
    descr = source.get_dataloader_descr(dataloader)
    with cd(dataloader_dir):  # move to the dataloader directory temporarily
        if "::" in descr.defined_as:
            # old API
            file_path, obj_name = tuple(descr.defined_as.split("::"))
            CustomDataLoader = getattr(load_module(file_path), obj_name)
        else:
            # new API - directly specify the object
            CustomDataLoader = load_obj(descr.defined_as)

    # download util links if specified under default & override the default parameters
    override = download_default_args(
        descr.args, source.get_dataloader_download_dir(dataloader))
    if override:
        # override default arguments specified under default
        CustomDataLoader = override_default_kwargs(CustomDataLoader, override)

    # infer the type
    if descr.type is None:
        if inspect.isfunction(CustomDataLoader):
            raise ValueError(
                "Dataloaders implemented as functions/generator need to specify the type flag in dataloader.yaml"
            )
        else:
            # figure out the right dataloader type
            descr.type = infer_parent_class(CustomDataLoader,
                                            AVAILABLE_DATALOADERS)
            if descr.type is None:
                raise ValueError(
                    "Dataloader needs to inherit from one of the available dataloaders {}"
                    .format(list(AVAILABLE_DATALOADERS)))

        # check that descr.type is correct
    if descr.type not in AVAILABLE_DATALOADERS:
        raise ValueError(
            "dataloader type: {0} is not in supported dataloaders:{1}".format(
                descr.type, list(AVAILABLE_DATALOADERS.keys())))

    # check that the extractor arguments match yaml arguments
    if not getargs(CustomDataLoader) == set(descr.args.keys()):
        # if kwargs is defined, we can assume that the dataloader accepts more than its explicitly defined arguments
        if "kwargs" in getargs(CustomDataLoader):
            missing_required_args = getargs(CustomDataLoader) - set(
                descr.args.keys()) - {"kwargs"}
            if len(missing_required_args) > 0:
                raise ValueError(
                    "DataLoader arguments:\n\t{args}\n are missing required arguments:\n\t{missing}"
                    .format(
                        args=set(descr.args.keys()),
                        missing=missing_required_args,
                    ))
        else:
            raise ValueError(
                "DataLoader arguments: \n{0}\n don't match ".format(
                    set(getargs(CustomDataLoader))) +
                "the specification in the dataloader.yaml file:\n{0}".format(
                    set(descr.args.keys())))

    # check that CustomDataLoader indeed interits from the right DataLoader
    if descr.type in DATALOADERS_AS_FUNCTIONS:
        # transform the functions into objects
        assert isinstance(CustomDataLoader, types.FunctionType)
        CustomDataLoader = AVAILABLE_DATALOADERS[descr.type].from_fn(
            CustomDataLoader)
    else:
        if not issubclass(CustomDataLoader, AVAILABLE_DATALOADERS[descr.type]):
            raise ValueError(
                "DataLoader does't inherit from the specified dataloader: {0}".
                format(AVAILABLE_DATALOADERS[descr.type].__name__))
    logger.info('successfully loaded the dataloader {} from {}'.format(
        dataloader,
        os.path.normpath(os.path.join(dataloader_dir, descr.defined_as))))

    # enrich the original dataloader class with description
    Dl = CustomDataLoader._add_description_factory(descr)
    # add other fields
    Dl.source = source
    Dl.source_dir = dataloader_dir
    return Dl