def cli_get_example(command, raw_args): """Downloads the example files to the desired directory """ assert command == "get-example" # setup the arg-parsing parser = argparse.ArgumentParser('kipoi {}'.format(command), description='Get example files') add_model(parser, source="kipoi") parser.add_argument("-o", "--output", default="example", required=False, help="Output directory where to store the examples. Default: 'example'") args = parser.parse_args(raw_args) # -------------------------------------------- md = kipoi.get_model_descr(args.model, args.source) src = kipoi.get_source(args.source) # load the default dataloader if isinstance(md.default_dataloader, kipoi.specs.DataLoaderImport): with cd(src.get_model_dir(args.model)): dl_descr = md.default_dataloader.get() else: # load from directory # attach the default dataloader already to the model dl_descr = kipoi.get_dataloader_descr(os.path.join(args.model, md.default_dataloader), source=args.source) kwargs = dl_descr.download_example(output_dir=args.output, dry_run=False) logger.info("Example files downloaded to: {}".format(args.output)) logger.info("use the following dataloader kwargs:") print(json.dumps(kwargs))
def init_example(cls): """Instantiate the class using example_kwargs """ if cls.source_dir is not None: with cd(cls.source_dir): # always init the example in the original directory return cls(**cls.example_kwargs) else: return cls(**cls.example_kwargs)
def cli_info(command, raw_args): """CLI interface to predict """ assert command == "info" parser = argparse.ArgumentParser('kipoi {}'.format(command), description="Prints dataloader" + " keyword arguments.") add_model(parser) add_dataloader(parser, with_args=False) args = parser.parse_args(raw_args) # -------------------------------------------- # load model & dataloader md = kipoi.get_model_descr(args.model, args.source) src = kipoi.get_source(args.source) # load the default dataloader try: if isinstance(md.default_dataloader, kipoi.specs.DataLoaderImport): with cd(src.get_model_dir(args.model)): dl_descr = md.default_dataloader.get() else: # load from directory # attach the default dataloader already to the model dl_descr = kipoi.get_dataloader_descr(os.path.join( args.model, md.default_dataloader), source=args.source) # if kipoiseq is not installed you get an ImportError except ImportError: dl_descr = None print("-" * 80) print("'{0}' from source '{1}'".format(str(args.model), str(args.source))) print("") print("Model information") print("-----------") print(md.info.get_config_as_yaml()) if dl_descr: print("Dataloader arguments") print("--------------------") dl_descr.print_args() print("--------------------\n") print("Run `kipoi get-example {} -o example` to download example files.\n". format(args.model))
def get_dataloader(dataloader, source="kipoi"): """Loads the dataloader # Arguments dataloader (str): dataloader name source (str): source name # Returns - Instance of class inheriting from `kipoi.data.BaseDataLoader` (like `kipoi.data.Dataset`) decorated with additional attributes. # Methods - __batch_iter(batch_size, num_workers, **kwargs)__ - Arguments - **batch_size**: batch size - **num_workers**: Number of workers to use in parallel. - ****kwargs**: Other kwargs specific to each dataloader - Yields - `dict` with `"inputs"`, `"targets"` and `"metadata"` - __batch_train_iter(cycle=True, **kwargs)__ - Arguments - **cycle**: if True, cycle indefinitely - ****kwargs**: Kwargs passed to `batch_iter()` like `batch_size` - Yields - tuple of ("inputs", "targets") from the usual dict returned by `batch_iter()` - __batch_predict_iter(**kwargs)__ - Arguments - ****kwargs**: Kwargs passed to `batch_iter()` like `batch_size` - Yields - "inputs" field from the usual dict returned by `batch_iter()` - __load_all(**kwargs)__ - load the whole dataset into memory - Arguments - ****kwargs**: Kwargs passed to `batch_iter()` like `batch_size` - Returns - `dict` with `"inputs"`, `"targets"` and `"metadata"` - **init_example()** - instantiate the dataloader with example kwargs - **print_args()** - print information about the required arguments # Appended attributes - **type** (str): dataloader type (class name) - **defined_as** (str): path and dataloader name - **args** (list of kipoi.specs.DataLoaderArgument): datalaoder argument description - **info** (kipoi.specs.Info): general information about the dataloader - **schema** (kipoi.specs.DataloaderSchema): information about the input/output data modalities - **dependencies** (kipoi.specs.Dependencies): class specifying the dependencies. (implements `install` method for running the installation) - **name** (str): model name - **source** (str): model source - **source_dir** (str): local path to model source storage - **postprocessing** (dict): dictionary of loaded plugin specifications - **example_kwargs** (dict): kwargs for running the provided example """ # if source == 'py': # # load it from the python object # sys.path.append(os.path.getcwd()) # return DataLoaderImport(defined_as=dataloader).get() # TODO - allow source=py # pull the dataloader & get the dataloader directory if isinstance(source, str): source = kipoi.config.get_source(source) source.pull_dataloader(dataloader) dataloader_dir = source.get_dataloader_dir(dataloader) # -------------------------------------------- # Setup dataloader description descr = source.get_dataloader_descr(dataloader) with cd(dataloader_dir): # move to the dataloader directory temporarily if "::" in descr.defined_as: # old API file_path, obj_name = tuple(descr.defined_as.split("::")) CustomDataLoader = getattr(load_module(file_path), obj_name) else: # new API - directly specify the object CustomDataLoader = load_obj(descr.defined_as) # download util links if specified under default & override the default parameters override = download_default_args( descr.args, source.get_dataloader_download_dir(dataloader)) if override: # override default arguments specified under default CustomDataLoader = override_default_kwargs(CustomDataLoader, override) # infer the type if descr.type is None: if inspect.isfunction(CustomDataLoader): raise ValueError( "Dataloaders implemented as functions/generator need to specify the type flag in dataloader.yaml" ) else: # figure out the right dataloader type descr.type = infer_parent_class(CustomDataLoader, AVAILABLE_DATALOADERS) if descr.type is None: raise ValueError( "Dataloader needs to inherit from one of the available dataloaders {}" .format(list(AVAILABLE_DATALOADERS))) # check that descr.type is correct if descr.type not in AVAILABLE_DATALOADERS: raise ValueError( "dataloader type: {0} is not in supported dataloaders:{1}".format( descr.type, list(AVAILABLE_DATALOADERS.keys()))) # check that the extractor arguments match yaml arguments if not getargs(CustomDataLoader) == set(descr.args.keys()): # if kwargs is defined, we can assume that the dataloader accepts more than its explicitly defined arguments if "kwargs" in getargs(CustomDataLoader): missing_required_args = getargs(CustomDataLoader) - set( descr.args.keys()) - {"kwargs"} if len(missing_required_args) > 0: raise ValueError( "DataLoader arguments:\n\t{args}\n are missing required arguments:\n\t{missing}" .format( args=set(descr.args.keys()), missing=missing_required_args, )) else: raise ValueError( "DataLoader arguments: \n{0}\n don't match ".format( set(getargs(CustomDataLoader))) + "the specification in the dataloader.yaml file:\n{0}".format( set(descr.args.keys()))) # check that CustomDataLoader indeed interits from the right DataLoader if descr.type in DATALOADERS_AS_FUNCTIONS: # transform the functions into objects assert isinstance(CustomDataLoader, types.FunctionType) CustomDataLoader = AVAILABLE_DATALOADERS[descr.type].from_fn( CustomDataLoader) else: if not issubclass(CustomDataLoader, AVAILABLE_DATALOADERS[descr.type]): raise ValueError( "DataLoader does't inherit from the specified dataloader: {0}". format(AVAILABLE_DATALOADERS[descr.type].__name__)) logger.info('successfully loaded the dataloader {} from {}'.format( dataloader, os.path.normpath(os.path.join(dataloader_dir, descr.defined_as)))) # enrich the original dataloader class with description Dl = CustomDataLoader._add_description_factory(descr) # add other fields Dl.source = source Dl.source_dir = dataloader_dir return Dl