def cli_get_example(command, raw_args): """Downloads the example files to the desired directory """ assert command == "get-example" # setup the arg-parsing parser = argparse.ArgumentParser('kipoi {}'.format(command), description='Get example files') add_model(parser, source="kipoi") parser.add_argument("-o", "--output", default="example", required=False, help="Output directory where to store the examples. Default: 'example'") args = parser.parse_args(raw_args) # -------------------------------------------- md = kipoi.get_model_descr(args.model, args.source) src = kipoi.get_source(args.source) # load the default dataloader if isinstance(md.default_dataloader, kipoi.specs.DataLoaderImport): with cd(src.get_model_dir(args.model)): dl_descr = md.default_dataloader.get() else: # load from directory # attach the default dataloader already to the model dl_descr = kipoi.get_dataloader_descr(os.path.join(args.model, md.default_dataloader), source=args.source) kwargs = dl_descr.download_example(output_dir=args.output, dry_run=False) logger.info("Example files downloaded to: {}".format(args.output)) logger.info("use the following dataloader kwargs:") print(json.dumps(kwargs))
def merge_deps(models, dataloaders=None, source="kipoi", vep=False, gpu=False): deps = Dependencies() for model in models: logger.info("Loading model: {0} description".format(model)) parsed_source, parsed_model = parse_source_name(source, model) model_descr = kipoi.get_model_descr(parsed_model, parsed_source) deps = deps.merge(model_descr.dependencies) # handle the dataloader=None case if dataloaders is None or not dataloaders: dataloader = os.path.normpath(os.path.join(parsed_model, model_descr.default_dataloader)) logger.info("Inferred dataloader name: {0} from".format(dataloader) + " the model.") dataloader_descr = kipoi.get_dataloader_descr(dataloader, parsed_source) deps = deps.merge(dataloader_descr.dependencies) if dataloaders is not None or dataloaders: for dataloader in dataloaders: parsed_source, parsed_dataloader = parse_source_name(source, dataloader) dataloader_descr = kipoi.get_dataloader_descr(parsed_dataloader, parsed_source) deps = deps.merge(dataloader_descr.dependencies) # add Kipoi to the dependencies deps = KIPOI_DEPS.merge(deps) if vep: # add vep dependencies logger.info("Adding the vep dependencies") deps = VEP_DEPS.merge(deps) if gpu: logger.info("Using gpu-compatible dependencies") deps = deps.gpu() if platform == "darwin": logger.info("Using osx-type dependencies") deps = deps.osx() return deps
def get_dataloader_descr(model_name, source): from kipoi.utils import cd src = kipoi.get_source(source) md = kipoi.get_model_descr(model_name, source=source) if isinstance(md.default_dataloader, str): dl_path = os.path.join(model_name, md.default_dataloader) return kipoi.get_dataloader_descr(dl_path, source=source) else: with cd(src.get_model_dir(model_name)): return md.default_dataloader.get()
def test_load_model(example): example_dir = "example/models/{0}".format(example) if example in {"rbp", "iris_model_template"} and sys.version_info[0] == 2: pytest.skip("example not supported on python 2 ") if INSTALL_REQ: install_dataloader_requirements(example_dir, "dir") Dl = kipoi.get_dataloader_factory(example_dir, source="dir") Dl.type Dl.defined_as Dl.args Dl.info Dl.output_schema Dl.source # datalaoder Dl.batch_iter Dl.load_all Dl.print_args() kipoi.get_dataloader_descr(example_dir, source="dir").print_kwargs()
def model_list(model_name): """ Models list view """ source = current_app.config['SOURCE'] df = get_model_list(source) model_name = model_name.rstrip('/') vtype_path = get_view(model_name, df) if vtype_path is None: # run 404 return # pass else: vtype, path = vtype_path # render the model detail view if vtype == "model": # Model info retrieved from kipoi model = kipoi.get_model_descr(model_name, source=source) # Model dataloaders info retrieved from kipoi dataloader = kipoi.get_dataloader_descr(os.path.join( model_name, model.default_dataloader), source=source) title = model_name.split('/') # obtain snippets code_snippets = get_snippets(model_name, source) return render_template("models/model_details.html", model_name=model_name, model=model, dataloader=dataloader, title=title, code_snippets=code_snippets) # run the normal model list view on a subsetted table elif vtype == "model_list": model_df = get_model_list(source) # Filter the results model_df = model_df[model_df.model.str.contains("^" + path + "/")] filtered_models = model_df.to_dict(orient='records') return render_template("models/index.html", models=filtered_models) # redirect to the group list elif vtype == "group_list": return redirect(url_for('models.list_groups', group_name=path))
def get_dataloader_descr(model_name, source='kipoi'): """Not yet nicely integrated with Kipoi Args: model_name: model name as a string Returns: (model output schema, list of required files) """ dl_skip_arguments = { "kipoiseq.dataloaders.SeqIntervalDl": ['alphabet_axis', 'dummy_axis', 'alphabet', 'dtype'] } md = kipoi.get_model_descr(model_name) src = kipoi.get_source(source) # get dataloader if isinstance(md.default_dataloader, str): dataloader = kipoi.get_dataloader_descr(os.path.join( model_name, md.default_dataloader), source=source) dataloader_name = md.default_dataloader dataloader_args = dataloader.args else: with cd(src.get_model_dir(model_name)): dataloader = md.default_dataloader.get() dataloader_name = md.default_dataloader.defined_as dataloader_args = OrderedDict([ (k, v) for k, v in dataloader.args.items() if k not in list(md.default_dataloader.default_args) + dl_skip_arguments.get(dataloader_name, []) ]) if md.default_dataloader.defined_as == 'kipoiseq.dataloaders.SeqIntervalDl': # HACK - cleanup some values for SeqIntervalDl if md.default_dataloader.default_args.get("ignore_targets", False): dataloader_args.pop('label_dtype', None) required_files = [] if 'fasta_file' in dataloader.args: required_files.append("fasta_file") if 'gtf_file' in dataloader.args: required_files.append("gtf_file") return get_output_schema(md.schema.targets), required_files
def cli_info(command, raw_args): """CLI interface to predict """ assert command == "info" parser = argparse.ArgumentParser('kipoi {}'.format(command), description="Prints dataloader" + " keyword arguments.") add_model(parser) add_dataloader(parser, with_args=False) args = parser.parse_args(raw_args) # -------------------------------------------- # load model & dataloader md = kipoi.get_model_descr(args.model, args.source) src = kipoi.get_source(args.source) # load the default dataloader try: if isinstance(md.default_dataloader, kipoi.specs.DataLoaderImport): with cd(src.get_model_dir(args.model)): dl_descr = md.default_dataloader.get() else: # load from directory # attach the default dataloader already to the model dl_descr = kipoi.get_dataloader_descr(os.path.join( args.model, md.default_dataloader), source=args.source) # if kipoiseq is not installed you get an ImportError except ImportError: dl_descr = None print("-" * 80) print("'{0}' from source '{1}'".format(str(args.model), str(args.source))) print("") print("Model information") print("-----------") print(md.info.get_config_as_yaml()) if dl_descr: print("Dataloader arguments") print("--------------------") dl_descr.print_args() print("--------------------\n") print("Run `kipoi get-example {} -o example` to download example files.\n". format(args.model))
def merge_deps(models, dataloaders=None, source="kipoi", vep=False, interpret=False, gpu=False): """Setup the dependencies """ special_envs, only_models = split_models_special_envs(models) deps = Dependencies() # Treat the handcrafted environments differently for special_env in special_envs: from related import from_yaml logger.info("Loading environment definition: {0}".format(special_env)) # Load and merge the handcrafted deps. yaml_path = os.path.join( kipoi.get_source(source).local_path, special_env + ".yaml") if not os.path.exists(yaml_path): raise ValueError( "Environment definition file {0} not found in source {1}". format(yaml_path, source)) with open(yaml_path, "r", encoding="utf-8") as fh: special_env_deps = Dependencies.from_env_dict(from_yaml(fh)) deps = deps.merge(special_env_deps) for model in only_models: logger.info("Loading model: {0} description".format(model)) parsed_source, parsed_model = parse_source_name(source, model) sub_models = list_subcomponents(parsed_model, parsed_source, "model") if len(sub_models) == 0: raise ValueError("Model {0} not found in source {1}".format( parsed_model, parsed_source)) if len(sub_models) > 1: logger.info( "Found {0} models under the model name: {1}. Merging dependencies for all" .format(len(sub_models), parsed_model)) for sub_model in sub_models: model_descr = kipoi.get_model_descr(sub_model, parsed_source) model_dir = kipoi.get_source(parsed_source).get_model_dir( sub_model) deps = deps.merge(model_descr.dependencies) # handle the dataloader=None case if dataloaders is None or not dataloaders: if isinstance(model_descr.default_dataloader, DataLoaderImport): # dataloader specified by the import deps = deps.merge( model_descr.default_dataloader.dependencies) if model_descr.default_dataloader.parse_dependencies: # add dependencies specified in the yaml file # load from the dataloader description if you can try: with cd(model_dir): dataloader_descr = model_descr.default_dataloader.get( ) deps = deps.merge(dataloader_descr.dependencies) except ImportError as e: # package providing the dataloader is not installed yet if model_descr.default_dataloader.defined_as.startswith( "kipoiseq."): logger.info( "kipoiseq not installed. Using default kipoiseq dependencies for the dataloader: {}" .format(model_descr.default_dataloader. defined_as)) deps = deps.merge(KIPOISEQ_DEPS) else: logger.warning( "Unable to extract dataloader description. " "Make sure the package containing the dataloader `{}` is installed" .format(model_descr.default_dataloader. defined_as)) else: dataloader = os.path.normpath( os.path.join(sub_model, str(model_descr.default_dataloader))) logger.info("Inferred dataloader name: {0} from".format( dataloader) + " the model.") dataloader_descr = kipoi.get_dataloader_descr( dataloader, parsed_source) deps = deps.merge(dataloader_descr.dependencies) if dataloaders is not None or dataloaders: for dataloader in dataloaders: parsed_source, parsed_dataloader = parse_source_name( source, dataloader) sub_dataloaders = list_subcomponents(parsed_dataloader, parsed_source, "dataloader") if len(sub_dataloaders) == 0: raise ValueError( "Dataloader: {0} not found in source {1}".format( parsed_dataloader, parsed_source)) if len(sub_dataloaders) > 1: logger.info( "Found {0} dataloaders under the dataloader name: {1}. Merging dependencies for all" .format(len(sub_dataloaders), parsed_dataloader)) for sub_dataloader in sub_dataloaders: dataloader_descr = kipoi.get_dataloader_descr( sub_dataloader, parsed_source) deps = deps.merge(dataloader_descr.dependencies) # add Kipoi to the dependencies deps = KIPOI_DEPS.merge(deps) if vep: # add vep dependencies logger.info("Adding the vep dependencies") deps = VEP_DEPS.merge(deps) if interpret: # add vep dependencies logger.info("Adding the interpret dependencies") deps = INTERPRET_DEPS.merge(deps) if gpu: logger.info("Using gpu-compatible dependencies") deps = deps.gpu() if platform == "darwin": logger.info("Using osx-type dependencies") deps = deps.osx() return deps
def merge_deps(models, dataloaders=None, source="kipoi", vep=False, gpu=False): """Setup the dependencies """ deps = Dependencies() for model in models: logger.info("Loading model: {0} description".format(model)) parsed_source, parsed_model = parse_source_name(source, model) sub_models = list_subcomponents(parsed_model, parsed_source, "model") if len(sub_models) == 0: raise ValueError("Model {0} not found in source {1}".format( parsed_model, parsed_source)) if len(sub_models) > 1: logger.info( "Found {0} models under the model name: {1}. Merging dependencies for all" .format(len(sub_models), parsed_model)) for sub_model in sub_models: model_descr = kipoi.get_model_descr(sub_model, parsed_source) model_dir = kipoi.get_source(parsed_source).get_model_dir( sub_model) deps = deps.merge(model_descr.dependencies) # handle the dataloader=None case if dataloaders is None or not dataloaders: if isinstance(model_descr.default_dataloader, DataLoaderImport): # dataloader specified by the import deps = deps.merge( model_descr.default_dataloader.dependencies) if model_descr.default_dataloader.parse_dependencies: # add dependencies specified in the yaml file # load from the dataloader description if you can try: with cd(model_dir): dataloader_descr = model_descr.default_dataloader.get( ) deps = deps.merge(dataloader_descr.dependencies) except ImportError as e: # package providing the dataloader is not installed yet if model_descr.default_dataloader.defined_as.startswith( "kipoiseq."): logger.info( "kipoiseq not installed. Using default kipoiseq dependencies for the dataloader: {}" .format(model_descr.default_dataloader. defined_as)) deps = deps.merge(KIPOISEQ_DEPS) else: logger.warn( "Unable to extract dataloader description. " "Make sure the package containing the dataloader `{}` is installed" .format(model_descr.default_dataloader. defined_as)) else: dataloader = os.path.normpath( os.path.join(sub_model, str(model_descr.default_dataloader))) logger.info("Inferred dataloader name: {0} from".format( dataloader) + " the model.") dataloader_descr = kipoi.get_dataloader_descr( dataloader, parsed_source) deps = deps.merge(dataloader_descr.dependencies) if dataloaders is not None or dataloaders: for dataloader in dataloaders: parsed_source, parsed_dataloader = parse_source_name( source, dataloader) sub_dataloaders = list_subcomponents(parsed_dataloader, parsed_source, "dataloader") if len(sub_dataloaders) == 0: raise ValueError( "Dataloader: {0} not found in source {1}".format( parsed_dataloader, parsed_source)) if len(sub_dataloaders) > 1: logger.info( "Found {0} dataloaders under the dataloader name: {1}. Merging dependencies for all" .format(len(sub_dataloaders), parsed_dataloader)) for sub_dataloader in sub_dataloaders: dataloader_descr = kipoi.get_dataloader_descr( sub_dataloader, parsed_source) deps = deps.merge(dataloader_descr.dependencies) # add Kipoi to the dependencies deps = KIPOI_DEPS.merge(deps) if vep: # add vep dependencies logger.info("Adding the vep dependencies") deps = VEP_DEPS.merge(deps) if gpu: logger.info("Using gpu-compatible dependencies") deps = deps.gpu() if platform == "darwin": logger.info("Using osx-type dependencies") deps = deps.osx() return deps
def get_dataloader_descr(model_name): dl_path = os.path.join( model_name, kipoi.get_model_descr(model_name).default_dataloader) return kipoi.get_dataloader_descr(dl_path)
def test__get_dl_bed_fields(): model_dir = "examples/rbp/" assert ( kipoi.postprocessing.variant_effects.utils.generic._get_dl_bed_fields( kipoi.get_dataloader_descr(model_dir, source="dir")) == ['intervals_file'])
def model_list(model_name): """ Models list view """ from kipoi.utils import cd source = current_app.config['SOURCE'] df = get_model_list(source) model_name = model_name.rstrip('/') vtype_path = get_view(model_name, df) if vtype_path is None: # run 404 return # pass else: vtype, path = vtype_path # render the model detail view if vtype == "model": # Model info retrieved from kipoi model = kipoi.get_model_descr(model_name, source=source) src = kipoi.get_source(source) model_dir = kipoi.utils.relative_path(src.get_model_dir(model_name), src.local_path) model_url = github_dir_tree(src.remote_url, model_dir) # Model dataloaders info retrieved from kipoi if model.default_dataloader: if isinstance(model.default_dataloader, str): dl_rel_path = True dataloader = kipoi.get_dataloader_descr(os.path.join( model_name, model.default_dataloader), source=source) dataloader_name = model.default_dataloader dataloader_args = dataloader.args else: dl_rel_path = False with cd(src.get_model_dir(model_name)): dataloader = model.default_dataloader.get() dataloader_name = model.default_dataloader.defined_as dataloader_args = OrderedDict([ (k, v) for k, v in dataloader.args.items() if k not in list(model.default_dataloader.default_args) + dl_skip_arguments.get(dataloader_name, []) ]) if model.default_dataloader.defined_as == 'kipoiseq.dataloaders.SeqIntervalDl': # HACK - cleanup some values for SeqIntervalDl if model.default_dataloader.default_args.get( "ignore_targets", False): dataloader_args.pop('label_dtype', None) else: dataloader = None dataloader_name = '' dataloader_args = {} dl_rel_path = False title = model_name.split('/') # obtain snippets code_snippets = get_snippets(model_name, source) if model_name == "SeqVec/embedding2structure": code_snippets["docker"] = '' code_snippets["singularity"] = '' code_snippets["cli"] = '' code_snippets["python"] = '' code_snippets["R"] = '' # reading the README content readme_dir = kipoi.get_source( current_app.config['SOURCE']).get_model_dir(model_name) try: # python doesnt handle case sensetive path. so: filelists = os.listdir(readme_dir) readmeindx = [x.lower() for x in filelists].index("readme.md") filecontent = open(os.path.join(readme_dir, filelists[readmeindx]), "r").read() readmecontent = render_markdown(filecontent) # remove the title because already there is a title readmecontent = re.sub("<[hH][12]>.*</[hH][12]>", "", readmecontent, count=1) readmecontent = Markup(readmecontent) except IOError: readmecontent = "" except ValueError: readmecontent = "" return render_template( "models/model_details.html", model_name=model_name, model=model, contributors=update_contributors(model.info.contributors, model.info.authors), authors=update_authors(model.info.authors, model.info.cite_as), dataloader=dataloader, dataloader_args=dataloader_args, dataloader_name=dataloader_name, model_url=model_url, dl_rel_path=dl_rel_path, cite_as=update_cite_as(model.info.cite_as), title=title, code_snippets=code_snippets, readmecontent=readmecontent, model_postprocessing=available_postprocessing(model_name)) # run the normal model list view on a subsetted table elif vtype == "model_list": model_df = get_model_list(source) # TODO - augment the results # Filter the results model_df = model_df[model_df.model.str.contains("^" + path + "/")] filtered_models = model_df.to_dict(orient='records') filtered_models = [update_cite_as_dict(x) for x in filtered_models] # update contributors filtered_models = [ update_contributors_as_dict(x) for x in filtered_models ] # update authors filtered_models = [update_authors_as_dict(x) for x in filtered_models] # get readme file readme_dir = os.path.join( kipoi.get_source(current_app.config['SOURCE']).local_path, model_name) try: filelists = os.listdir(readme_dir) readmeindx = [x.lower() for x in filelists].index("readme.md") filecontent = open(os.path.join(readme_dir, filelists[readmeindx]), "r").read() readmecontent = render_markdown(filecontent) except IOError: readmecontent = "" except ValueError: readmecontent = "" return render_template("models/index.html", models=filtered_models, readmecontent=readmecontent) # redirect to the group list elif vtype == "group_list": return redirect(url_for('models.list_groups', group_name=path))
def merge_deps(models, dataloaders=None, source="kipoi", vep=False, gpu=False): """Setup the dependencies """ deps = Dependencies() for model in models: logger.info("Loading model: {0} description".format(model)) parsed_source, parsed_model = parse_source_name(source, model) sub_models = list_subcomponents(parsed_model, parsed_source, "model") if len(sub_models) == 0: raise ValueError("Model {0} not found in source {1}".format( parsed_model, parsed_source)) if len(sub_models) > 1: logger.info( "Found {0} models under the model name: {1}. Merging dependencies for all" .format(len(sub_models), parsed_model)) for sub_model in sub_models: model_descr = kipoi.get_model_descr(sub_model, parsed_source) deps = deps.merge(model_descr.dependencies) # handle the dataloader=None case if dataloaders is None or not dataloaders: dataloader = os.path.normpath( os.path.join(sub_model, model_descr.default_dataloader)) logger.info( "Inferred dataloader name: {0} from".format(dataloader) + " the model.") dataloader_descr = kipoi.get_dataloader_descr( dataloader, parsed_source) deps = deps.merge(dataloader_descr.dependencies) if dataloaders is not None or dataloaders: for dataloader in dataloaders: parsed_source, parsed_dataloader = parse_source_name( source, dataloader) sub_dataloaders = list_subcomponents(parsed_dataloader, parsed_source, "dataloader") if len(sub_dataloaders) == 0: raise ValueError( "Dataloader: {0} not found in source {1}".format( parsed_dataloader, parsed_source)) if len(sub_dataloaders) > 1: logger.info( "Found {0} dataloaders under the dataloader name: {1}. Merging dependencies for all" .format(len(sub_dataloaders), parsed_dataloader)) for sub_dataloader in sub_dataloaders: dataloader_descr = kipoi.get_dataloader_descr( sub_dataloader, parsed_source) deps = deps.merge(dataloader_descr.dependencies) # add Kipoi to the dependencies deps = KIPOI_DEPS.merge(deps) if vep: # add vep dependencies logger.info("Adding the vep dependencies") deps = VEP_DEPS.merge(deps) if gpu: logger.info("Using gpu-compatible dependencies") deps = deps.gpu() if platform == "darwin": logger.info("Using osx-type dependencies") deps = deps.osx() return deps