def install_dataloader_requirements(dataloader, source="kipoi"): """Install dataloader dependencies # Arguments datalaoder (str): dataloader name source (str): model source """ kipoi.get_source(source).get_model_descr(dataloader).dependencies.install()
def test_generate_env_db_entry(): # test in general and test whether the automatic generation of sub-models works, also in combination # with a clearly defined model import yaml import kipoi import time from kipoi.cli.parser_utils import parse_source_name kwargs = { "dataloader": [], "env": "test_env", "gpu": True, "model": None, "source": "dir", "tmpdir": "something", "vep": True } source_path = kipoi.get_source("dir").local_path kipoi_path = kipoi.get_source("kipoi").local_path for model in [["example/models/pyt"], [ "example/models/shared/envs/kipoi-py3-keras1.2", "example/models/pyt" ]]: kwargs['model'] = model db_entry = generate_env_db_entry(get_args(kwargs)()) assert all( [kwargs[k] == getattr(db_entry.create_args, k) for k in kwargs]) # generate the reference output special_envs, only_models = split_models_special_envs(model) sub_models = [] for model in only_models: parsed_source, parsed_model = parse_source_name( kwargs["source"], model) sub_models.extend([ os.path.join(source_path, e) for e in list_subcomponents( parsed_model, parsed_source, "model") ]) if len(special_envs) != 0: with open("example/models/shared/envs/models.yaml", "r") as fh: special_env_models = yaml.load(fh) for special_env in special_envs: for model_group_name in special_env_models[os.path.basename( special_env)]: sub_models.extend([ os.path.join(kipoi_path, e) for e in list_subcomponents( model_group_name, "kipoi", "model") ]) assert set(db_entry.compatible_models) == set(sub_models) assert db_entry.cli_path is None assert db_entry.successful == False assert db_entry.kipoi_version == kipoi.__version__ assert db_entry.timestamp < time.time()
def list_groups(group_name=None): """ Group list view """ source = current_app.config['SOURCE'] if group_name is None: group_name = "" group_name = group_name.rstrip('/') group_df = get_model_groups(source, group_name) group_list = group_df.to_dict(orient='records') # parse cite_as group_list = [update_cite_as_dict(x) for x in group_list] # update contributors group_list = [update_contributors_as_dict(x) for x in group_list] # update authors group_list = [update_authors_as_dict(x) for x in group_list] # get readme file readme_dir = os.path.join( kipoi.get_source(current_app.config['SOURCE']).local_path, group_name) try: # python doesnt handle case sensetive path. so: filelists = os.listdir(readme_dir) readmeindx = [x.lower() for x in filelists].index("readme.md") filecontent = open(os.path.join(readme_dir, filelists[readmeindx]), "r").read() readmecontent = render_markdown(filecontent) except IOError: readmecontent = "" except ValueError: readmecontent = "" return render_template("models/index_groups.html", groups=group_list, readmecontent=readmecontent)
def cli_get_example(command, raw_args): """Downloads the example files to the desired directory """ assert command == "get-example" # setup the arg-parsing parser = argparse.ArgumentParser('kipoi {}'.format(command), description='Get example files') add_model(parser, source="kipoi") parser.add_argument("-o", "--output", default="example", required=False, help="Output directory where to store the examples. Default: 'example'") args = parser.parse_args(raw_args) # -------------------------------------------- md = kipoi.get_model_descr(args.model, args.source) src = kipoi.get_source(args.source) # load the default dataloader if isinstance(md.default_dataloader, kipoi.specs.DataLoaderImport): with cd(src.get_model_dir(args.model)): dl_descr = md.default_dataloader.get() else: # load from directory # attach the default dataloader already to the model dl_descr = kipoi.get_dataloader_descr(os.path.join(args.model, md.default_dataloader), source=args.source) kwargs = dl_descr.download_example(output_dir=args.output, dry_run=False) logger.info("Example files downloaded to: {}".format(args.output)) logger.info("use the following dataloader kwargs:") print(json.dumps(kwargs))
def get_environments(source): """Cache for kipoi's environments""" import os from kipoi.utils import read_yaml src = kipoi.get_source(source) environments = read_yaml( os.path.join(src.local_path, 'shared/envs/models.yaml')) return environments
def test_list_models_group(): dfg = kipoi.get_source("kipoi").list_models_by_group() dfg_columns = ["group", "N_models", "N_subgroups", "is_group", "authors", "contributors", "veff_score_variants", "type", "license", "cite_as", "tags"] assert dfg_columns == list(dfg.columns) assert len(dfg) > 0 assert dfg.group.str.contains("^CpGenie$").sum() == 1
def get_dataloader_descr(model_name, source): from kipoi.utils import cd src = kipoi.get_source(source) md = kipoi.get_model_descr(model_name, source=source) if isinstance(md.default_dataloader, str): dl_path = os.path.join(model_name, md.default_dataloader) return kipoi.get_dataloader_descr(dl_path, source=source) else: with cd(src.get_model_dir(model_name)): return md.default_dataloader.get()
def container_remote_url(model, source='kipoi'): src = get_source(source) singularity_container_json = os.path.join(src.local_path, CONTAINER_PREFIX, "model-to-singularity.json") with open(singularity_container_json, 'r') as singularity_container_json_filehandle: model_to_singularity_container_dict = json.load(singularity_container_json_filehandle) if model in model_to_singularity_container_dict: # Exact match such as MMSplice/mtsplice and APARENT/veff, Basset return model_to_singularity_container_dict[model] elif model.split('/')[0] in model_to_singularity_container_dict: return model_to_singularity_container_dict[model.split('/')[0]] else: return {}
def generate_env_db_entry(args, args_env_overload=None): from collections import OrderedDict from kipoi.conda.env_db import EnvDbEntry from kipoi.conda import get_conda_version special_envs, only_models = split_models_special_envs(args.model) sub_models = [] for model in only_models: parsed_source, parsed_model = parse_source_name(args.source, model) source_path = kipoi.get_source(parsed_source).local_path models = list_subcomponents(parsed_model, parsed_source, "model") sub_models.extend([os.path.join(source_path, m) for m in models]) if len(special_envs) != 0: # for the special envs load the corresponding models: for special_env in special_envs: special_env_folder = "/".join( special_env.rstrip("/").split("/")[:-1]) source_path = kipoi.get_source(args.source).local_path with open( os.path.join(source_path, special_env_folder, "models.yaml"), "r") as fh: special_env_models = yaml.load(fh) # extend the sub_models by all the submodels covered by the handcrafted environments (special_envs) # Those models **always** refer to the kipoi source for model_group_name in special_env_models[os.path.basename( special_env)]: source_path = kipoi.get_source("kipoi").local_path models = list_subcomponents(model_group_name, "kipoi", "model") sub_models.extend( [os.path.join(source_path, m) for m in models]) entry = EnvDbEntry(conda_version=get_conda_version(), kipoi_version=kipoi.__version__, timestamp=time.time(), compatible_models=sub_models, create_args=OrderedDict(args._get_kwargs())) if args_env_overload is not None: entry.create_args.env = args_env_overload return entry
def test_list_softlink_dependencies(): """Test if finding model dependencies works """ component_dir = kipoi.get_source("kipoi").local_path deps = list_softlink_dependencies(os.path.join(component_dir, 'HAL'), component_dir) # one of these two, depending on the model source assert (deps == {'MaxEntScan'}) or (deps == {'MaxEntScan/template', 'MaxEntScan/template/example_files', 'labranchor/example_files'}) assert list_softlink_dependencies(os.path.join(component_dir, 'deepTarget'), component_dir) == set()
def install_model_requirements(model, source="kipoi", and_dataloaders=True): md = kipoi.get_source(source).get_model_descr(model) md.dependencies.install() if and_dataloaders: if ":" in md.default_dataloader: dl_source, dl_path = md.default_dataloader.split(":") else: dl_source = source dl_path = md.default_dataloader default_dataloader_path = os.path.join("/" + model, dl_path)[1:] dl = kipoi.config.get_source(dl_source).get_dataloader_descr( default_dataloader_path) dl.dependencies.install()
def all_urls(): df = kipoi.get_source("kipoi").list_models() model = df.model urls = set() for m in model: while m: urls.add(m) m = os.path.dirname(m) groups = {x for x in urls if get_view(x, df)[0] == "group_list"} # exclude the final models groups = groups - set(model) return ["/", "/groups/"] + ["/groups/{0}/".format(x) for x in groups ] + ["/models/{0}/".format(x) for x in urls]
def get_envs_by_model(models, source, only_most_recent=True, only_valid=False): source_path = kipoi.get_source(source).local_path entries = [] db = get_model_env_db() for m in models: res = db.get_entry_by_model(os.path.join(source_path, m), only_most_recent=only_most_recent, only_valid=only_valid) if only_most_recent: entries.append(res) else: entries.extend(res) entries = [e for e in entries if e is not None] return entries
def list_subcomponents(component, source, which="model"): """List all the available submodels Args: model: model name or a subname: e.g. instaead of Model1/CTCF we can give Model1 and then all the sub-models would be included source: model source """ src = kipoi.get_source(source) if src._is_component(component, which): return [component] else: return [x for x in src._list_components(which) if x.startswith(component) and "/template" not in x]
def cli_ls(command, raw_args): """List all kipoi models """ assert command == "ls" parser = argparse.ArgumentParser('kipoi {}'.format(command), description="Lists available models") parser.add_argument("group_filter", nargs='?', default='', help="A relative path to the model group used to subset the model list. Use 'all' to show all models") parser.add_argument("--tsv", action='store_true', help="Print the output in the tsv format.") add_source(parser) args = parser.parse_args(raw_args) grp = kipoi.get_source(args.source) df = grp.list_models() ls_helper(df, args.group_filter, args.tsv)
def test_list_softlink_dependencies(): """Test if finding model dependencies works """ component_dir = kipoi.get_source("kipoi").local_path assert list_softlink_dependencies( os.path.join(component_dir, 'rbp_eclip/UPF1'), component_dir) == {'rbp_eclip/template'} assert list_softlink_dependencies(os.path.join(component_dir, 'HAL'), component_dir) == { 'MaxEntScan/template', 'MaxEntScan/template/example_files', 'labranchor/example_files' } assert list_softlink_dependencies( os.path.join(component_dir, 'deepTarget'), component_dir) == set()
def get_envs_by_model(models, source, only_most_recent=True, only_valid=False): if isinstance(models, str): models = [models] source_path = kipoi.get_source(source).local_path entries = [] db = env_db.get_model_env_db() for m in models: res = db.get_entry_by_model(_env_db_model_name(source, m), only_most_recent=only_most_recent, only_valid=only_valid) if only_most_recent: entries.append(res) else: entries.extend(res) entries = [e for e in entries if e is not None] return entries
def test_env_db_kipoi(tmpdir, monkeypatch): # Test the kipoi vs. dir path ambiguation # Test the DeepSEA model using the `kipoi` and the `dir` sources # Test the `shared/envs/kipoi-py3-keras1.2.yaml` model using the `kipoi` and the `dir` sources json_file = os.path.join(str(tmpdir), "db.json") sample_cli_path = os.path.join(str(tmpdir), "sample") with open(sample_cli_path, "w") as fh: fh.write("") db = EnvDb(json_file) kwargs = {"dataloader": [], "gpu": True, "model": None, "source": "kipoi", "tmpdir": "something", "vep": True} # generate the kipoi entries kipoi_entries = [] for model in [["DeepSEA"], ["shared/envs/kipoi-py3-keras1.2"]]: kwargs['model'] = model db_entry = generate_env_db_entry(get_args(kwargs)()) db.append(db_entry) kipoi_entries.append(db_entry) # generate the kipoi entries dir_entries = [] local_path = kipoi.get_source("dir").local_path kwargs["source"] = "dir" for model in [["example/models/pyt"], ["example/models/shared/envs/kipoi-py3-keras1.2"]]: kwargs['model'] = [os.path.join(local_path,model[0])] db_entry = generate_env_db_entry(get_args(kwargs)()) db.append(db_entry) dir_entries.append(db_entry) # make sure there is no mixup between the kipoi and dir models and make sure the full path is only used # for dir models assert db.get_entry_by_model("DeepSEA", only_most_recent=False) == [kipoi_entries[0]] assert db.get_entry_by_model("CpGenie/merged", only_most_recent=False) == [dir_entries[1], kipoi_entries[1]] assert db.get_entry_by_model(os.path.join(local_path, "example/models/pyt"), only_most_recent=False) == [dir_entries[0]] # monkeypatch the get_model_env_db() monkeypatch.setattr(kipoi.conda.env_db, 'get_model_env_db', lambda: db) assert get_envs_by_model(['DeepSEA'], "kipoi", only_most_recent=False, only_valid=False) == [kipoi_entries[0]] assert get_envs_by_model(["CpGenie/merged"], "kipoi", only_most_recent=False, only_valid=False) == [dir_entries[1],kipoi_entries[1]] assert get_envs_by_model(["example/models/pyt"], "dir", only_most_recent=False, only_valid=False) == [dir_entries[0]]
def get_dataloader_descr(model_name, source='kipoi'): """Not yet nicely integrated with Kipoi Args: model_name: model name as a string Returns: (model output schema, list of required files) """ dl_skip_arguments = { "kipoiseq.dataloaders.SeqIntervalDl": ['alphabet_axis', 'dummy_axis', 'alphabet', 'dtype'] } md = kipoi.get_model_descr(model_name) src = kipoi.get_source(source) # get dataloader if isinstance(md.default_dataloader, str): dataloader = kipoi.get_dataloader_descr(os.path.join( model_name, md.default_dataloader), source=source) dataloader_name = md.default_dataloader dataloader_args = dataloader.args else: with cd(src.get_model_dir(model_name)): dataloader = md.default_dataloader.get() dataloader_name = md.default_dataloader.defined_as dataloader_args = OrderedDict([ (k, v) for k, v in dataloader.args.items() if k not in list(md.default_dataloader.default_args) + dl_skip_arguments.get(dataloader_name, []) ]) if md.default_dataloader.defined_as == 'kipoiseq.dataloaders.SeqIntervalDl': # HACK - cleanup some values for SeqIntervalDl if md.default_dataloader.default_args.get("ignore_targets", False): dataloader_args.pop('label_dtype', None) required_files = [] if 'fasta_file' in dataloader.args: required_files.append("fasta_file") if 'gtf_file' in dataloader.args: required_files.append("gtf_file") return get_output_schema(md.schema.targets), required_files
def cli_info(command, raw_args): """CLI interface to predict """ assert command == "info" parser = argparse.ArgumentParser('kipoi {}'.format(command), description="Prints dataloader" + " keyword arguments.") add_model(parser) add_dataloader(parser, with_args=False) args = parser.parse_args(raw_args) # -------------------------------------------- # load model & dataloader md = kipoi.get_model_descr(args.model, args.source) src = kipoi.get_source(args.source) # load the default dataloader try: if isinstance(md.default_dataloader, kipoi.specs.DataLoaderImport): with cd(src.get_model_dir(args.model)): dl_descr = md.default_dataloader.get() else: # load from directory # attach the default dataloader already to the model dl_descr = kipoi.get_dataloader_descr(os.path.join( args.model, md.default_dataloader), source=args.source) # if kipoiseq is not installed you get an ImportError except ImportError: dl_descr = None print("-" * 80) print("'{0}' from source '{1}'".format(str(args.model), str(args.source))) print("") print("Model information") print("-----------") print(md.info.get_config_as_yaml()) if dl_descr: print("Dataloader arguments") print("--------------------") dl_descr.print_args() print("--------------------\n") print("Run `kipoi get-example {} -o example` to download example files.\n". format(args.model))
def install_model_requirements(model, source="kipoi", and_dataloaders=True): """Install model dependencies # Arguments model (str): model name source (str): model source and_dataloaders (bool): if True, install also the dependencies for the default dataloader """ md = kipoi.get_source(source).get_model_descr(model) md.dependencies.install() if and_dataloaders: if ":" in md.default_dataloader: dl_source, dl_path = md.default_dataloader.split(":") else: dl_source = source dl_path = md.default_dataloader default_dataloader_path = os.path.join("/" + model, dl_path)[1:] dl = kipoi.config.get_source(dl_source).get_dataloader_descr( default_dataloader_path) dl.dependencies.install()
def _env_db_model_name(source, model): ret = model if source != "kipoi": source_path = kipoi.get_source(source).local_path ret = os.path.join(source_path, model) return ret
def merge_deps(models, dataloaders=None, source="kipoi", vep=False, interpret=False, gpu=False): """Setup the dependencies """ special_envs, only_models = split_models_special_envs(models) deps = Dependencies() # Treat the handcrafted environments differently for special_env in special_envs: from related import from_yaml logger.info("Loading environment definition: {0}".format(special_env)) # Load and merge the handcrafted deps. yaml_path = os.path.join( kipoi.get_source(source).local_path, special_env + ".yaml") if not os.path.exists(yaml_path): raise ValueError( "Environment definition file {0} not found in source {1}". format(yaml_path, source)) with open(yaml_path, "r", encoding="utf-8") as fh: special_env_deps = Dependencies.from_env_dict(from_yaml(fh)) deps = deps.merge(special_env_deps) for model in only_models: logger.info("Loading model: {0} description".format(model)) parsed_source, parsed_model = parse_source_name(source, model) sub_models = list_subcomponents(parsed_model, parsed_source, "model") if len(sub_models) == 0: raise ValueError("Model {0} not found in source {1}".format( parsed_model, parsed_source)) if len(sub_models) > 1: logger.info( "Found {0} models under the model name: {1}. Merging dependencies for all" .format(len(sub_models), parsed_model)) for sub_model in sub_models: model_descr = kipoi.get_model_descr(sub_model, parsed_source) model_dir = kipoi.get_source(parsed_source).get_model_dir( sub_model) deps = deps.merge(model_descr.dependencies) # handle the dataloader=None case if dataloaders is None or not dataloaders: if isinstance(model_descr.default_dataloader, DataLoaderImport): # dataloader specified by the import deps = deps.merge( model_descr.default_dataloader.dependencies) if model_descr.default_dataloader.parse_dependencies: # add dependencies specified in the yaml file # load from the dataloader description if you can try: with cd(model_dir): dataloader_descr = model_descr.default_dataloader.get( ) deps = deps.merge(dataloader_descr.dependencies) except ImportError as e: # package providing the dataloader is not installed yet if model_descr.default_dataloader.defined_as.startswith( "kipoiseq."): logger.info( "kipoiseq not installed. Using default kipoiseq dependencies for the dataloader: {}" .format(model_descr.default_dataloader. defined_as)) deps = deps.merge(KIPOISEQ_DEPS) else: logger.warning( "Unable to extract dataloader description. " "Make sure the package containing the dataloader `{}` is installed" .format(model_descr.default_dataloader. defined_as)) else: dataloader = os.path.normpath( os.path.join(sub_model, str(model_descr.default_dataloader))) logger.info("Inferred dataloader name: {0} from".format( dataloader) + " the model.") dataloader_descr = kipoi.get_dataloader_descr( dataloader, parsed_source) deps = deps.merge(dataloader_descr.dependencies) if dataloaders is not None or dataloaders: for dataloader in dataloaders: parsed_source, parsed_dataloader = parse_source_name( source, dataloader) sub_dataloaders = list_subcomponents(parsed_dataloader, parsed_source, "dataloader") if len(sub_dataloaders) == 0: raise ValueError( "Dataloader: {0} not found in source {1}".format( parsed_dataloader, parsed_source)) if len(sub_dataloaders) > 1: logger.info( "Found {0} dataloaders under the dataloader name: {1}. Merging dependencies for all" .format(len(sub_dataloaders), parsed_dataloader)) for sub_dataloader in sub_dataloaders: dataloader_descr = kipoi.get_dataloader_descr( sub_dataloader, parsed_source) deps = deps.merge(dataloader_descr.dependencies) # add Kipoi to the dependencies deps = KIPOI_DEPS.merge(deps) if vep: # add vep dependencies logger.info("Adding the vep dependencies") deps = VEP_DEPS.merge(deps) if interpret: # add vep dependencies logger.info("Adding the interpret dependencies") deps = INTERPRET_DEPS.merge(deps) if gpu: logger.info("Using gpu-compatible dependencies") deps = deps.gpu() if platform == "darwin": logger.info("Using osx-type dependencies") deps = deps.osx() return deps
def cli_test(command, raw_args): """Runs test on the model """ assert command == "test" # setup the arg-parsing parser = argparse.ArgumentParser('kipoi {}'.format(command), description='script to test model zoo submissions. Example usage:\n' '`kipoi test model/directory`, where `model/directory` is the ' 'path to a directory containing a model.yaml file.') add_model(parser, source="dir") parser.add_argument('--batch_size', type=int, default=32, help='Batch size to use in prediction') parser.add_argument("-o", "--output", default=None, required=False, help="Output hdf5 file") parser.add_argument("-s", "--skip-expect", action='store_true', help="Skip validating the expected predictions if test.expect field is specified under model.yaml") parser.add_argument("-e", "--expect", default=None, help="File path to the hdf5 file of predictions produced by kipoi test -o file.h5 " "or kipoi predict -o file.h5 --keep_inputs. Overrides test.expect in model.yaml") args = parser.parse_args(raw_args) # -------------------------------------------- mh = kipoi.get_model(args.model, args.source) if not mh._sufficient_deps(mh.dependencies): # model requirements should be installed logger.warning("Required package '{0}' for model type: {1} is not listed in the dependencies". format(mh.MODEL_PACKAGE, mh.type)) # Load the test files from model source mh.pipeline.predict_example(batch_size=args.batch_size, output_file=args.output) if (mh.test.expect is not None or args.expect is not None) \ and not args.skip_expect and args.output is None: if args.expect is not None: # `expect` specified from the CLI expect = args.expect else: # `expect` taken from model.yaml if isinstance(mh.test.expect, kipoi.specs.RemoteFile): # download the file output_dir = kipoi.get_source(args.source).get_model_download_dir(args.model) makedir_exist_ok(output_dir) mh.test.expect = mh.test.expect.get_file(os.path.join(output_dir, 'test.expect.h5')) expect = mh.test.expect logger.info('Testing if the predictions match the expected ones in the file: {}'.format(expect)) logger.info('Desired precision (number of matching decimal places): {}'.format(mh.test.precision_decimal)) # iteratively load the expected file expected = kipoi.readers.HDF5Reader(expect) expected.open() it = expected.batch_iter(batch_size=args.batch_size) for i, batch in enumerate(tqdm(it, total=len(expected) // args.batch_size)): if i == 0 and ('inputs' not in batch or 'preds' not in batch): raise ValueError("test.expect file requires 'inputs' and 'preds' " "to be specified. Available keys: {}".format(list(expected))) pred_batch = mh.predict_on_batch(batch['inputs']) # compare to the predictions # import ipdb # ipdb.set_trace() try: compare_numpy_dict(pred_batch, batch['preds'], exact=False, decimal=mh.test.precision_decimal) except Exception as e: logger.error("Model predictions don't match the expected predictions." "expected: {}\nobserved: {}. Exception: {}".format(batch['preds'], pred_batch, e)) expected.close() sys.exit(1) expected.close() logger.info('All predictions match') logger.info('Successfully ran test_predict')
def merge_deps(models, dataloaders=None, source="kipoi", vep=False, gpu=False): """Setup the dependencies """ deps = Dependencies() for model in models: logger.info("Loading model: {0} description".format(model)) parsed_source, parsed_model = parse_source_name(source, model) sub_models = list_subcomponents(parsed_model, parsed_source, "model") if len(sub_models) == 0: raise ValueError("Model {0} not found in source {1}".format( parsed_model, parsed_source)) if len(sub_models) > 1: logger.info( "Found {0} models under the model name: {1}. Merging dependencies for all" .format(len(sub_models), parsed_model)) for sub_model in sub_models: model_descr = kipoi.get_model_descr(sub_model, parsed_source) model_dir = kipoi.get_source(parsed_source).get_model_dir( sub_model) deps = deps.merge(model_descr.dependencies) # handle the dataloader=None case if dataloaders is None or not dataloaders: if isinstance(model_descr.default_dataloader, DataLoaderImport): # dataloader specified by the import deps = deps.merge( model_descr.default_dataloader.dependencies) if model_descr.default_dataloader.parse_dependencies: # add dependencies specified in the yaml file # load from the dataloader description if you can try: with cd(model_dir): dataloader_descr = model_descr.default_dataloader.get( ) deps = deps.merge(dataloader_descr.dependencies) except ImportError as e: # package providing the dataloader is not installed yet if model_descr.default_dataloader.defined_as.startswith( "kipoiseq."): logger.info( "kipoiseq not installed. Using default kipoiseq dependencies for the dataloader: {}" .format(model_descr.default_dataloader. defined_as)) deps = deps.merge(KIPOISEQ_DEPS) else: logger.warn( "Unable to extract dataloader description. " "Make sure the package containing the dataloader `{}` is installed" .format(model_descr.default_dataloader. defined_as)) else: dataloader = os.path.normpath( os.path.join(sub_model, str(model_descr.default_dataloader))) logger.info("Inferred dataloader name: {0} from".format( dataloader) + " the model.") dataloader_descr = kipoi.get_dataloader_descr( dataloader, parsed_source) deps = deps.merge(dataloader_descr.dependencies) if dataloaders is not None or dataloaders: for dataloader in dataloaders: parsed_source, parsed_dataloader = parse_source_name( source, dataloader) sub_dataloaders = list_subcomponents(parsed_dataloader, parsed_source, "dataloader") if len(sub_dataloaders) == 0: raise ValueError( "Dataloader: {0} not found in source {1}".format( parsed_dataloader, parsed_source)) if len(sub_dataloaders) > 1: logger.info( "Found {0} dataloaders under the dataloader name: {1}. Merging dependencies for all" .format(len(sub_dataloaders), parsed_dataloader)) for sub_dataloader in sub_dataloaders: dataloader_descr = kipoi.get_dataloader_descr( sub_dataloader, parsed_source) deps = deps.merge(dataloader_descr.dependencies) # add Kipoi to the dependencies deps = KIPOI_DEPS.merge(deps) if vep: # add vep dependencies logger.info("Adding the vep dependencies") deps = VEP_DEPS.merge(deps) if gpu: logger.info("Using gpu-compatible dependencies") deps = deps.gpu() if platform == "darwin": logger.info("Using osx-type dependencies") deps = deps.osx() return deps
def test_env_db(tmpdir): json_file = os.path.join(str(tmpdir), "db.json") sample_cli_path = os.path.join(str(tmpdir), "sample") with open(sample_cli_path, "w") as fh: fh.write("") db = EnvDb(json_file) kwargs = {"dataloader": [], "env": "test_env", "gpu": True, "model": None, "source": "dir", "tmpdir": "something", "vep": True} entries = [] source_path = kipoi.get_source("dir").local_path for model in [["example/models/pyt"], ["example/models/shared/envs/kipoi-py3-keras1.2", "example/models/pyt"]]: kwargs['model'] = model db_entry = generate_env_db_entry(get_args(kwargs)()) db.append(db_entry) entries.append(db_entry) pyt_query_name = os.path.join(source_path, "example/models/pyt") assert db.get_entry_by_model(pyt_query_name) == entries[1] assert db.get_entry_by_model(pyt_query_name + "_class") is None assert db.get_entry_by_model(pyt_query_name, only_most_recent=False) == entries[::-1] # test if the viability check is ok: entry = db.get_entry_by_model(pyt_query_name) entry.successful = True entry.cli_path = sample_cli_path assert db.get_entry_by_model(pyt_query_name, only_most_recent=False, only_valid=True) == [entry] entry.successful = False assert len(db.get_entry_by_model(pyt_query_name, only_most_recent=False, only_valid=True)) == 0 entry.successful = True entry.cli_path = None assert len(db.get_entry_by_model(pyt_query_name, only_most_recent=False, only_valid=True)) == 0 db.save() del db # Test if loading is fine db2 = EnvDb(json_file) # test dict identity assert_rec(db2.get_entry_by_model(pyt_query_name).get_config(), entries[1].get_config()) assert db2.get_entry_by_model(pyt_query_name + "_class") is None del db2 # Test if bad entries are skipped with open(json_file, "r") as fh: db_dict = json.load(fh) # Add a bad entry: new_key = max([int(k) for k in db_dict["_default"]]) + 1 db_dict["_default"][str(new_key)] = {"conda_version": "conda 4.5.4", "kipoi_version": "0.5.6"} with open(json_file, "w") as fh: json.dump(db_dict, fh) # Check if there is a warning # with pytest.warns(UserWarning): # There seems to be a general problem with warnings... db_warns = EnvDb(json_file) assert len(db_warns.entries) == 2 # Now save so that the bad entry is be gone db_warns.save() del db_warns # Make sure the bad entry is not there anymore with open(json_file, "r") as fh: db_dict_recovered = json.load(fh) found = 0 for val in db_dict_recovered['_default'].values(): found += int(val == db_dict["_default"][str(new_key)]) assert len(db_dict_recovered["_default"]) == new_key - 1 assert found == 0 os.unlink(json_file)
def cli_test_source(command, raw_args): """Runs test on the model """ assert command == "test-source" # setup the arg-parsing parser = argparse.ArgumentParser('kipoi {}'.format(command), description='Test models in model source') parser.add_argument('source', default="kipoi", help='Which source to test') parser.add_argument('--git-range', nargs='+', help='''Git range (e.g. commits or something like "master HEAD" to check commits in HEAD vs master, or just "HEAD" to include uncommitted changes). All models modified within this range will be tested.''') parser.add_argument('-n', '--dry_run', action='store_true', help='Dont run model testing') parser.add_argument('-b', '--batch_size', default=4, type=int, help='Batch size') parser.add_argument('-x', '--exitfirst', action='store_true', help='exit instantly on first error or failed test.') parser.add_argument( '-k', default=None, help='only run tests which match the given substring expression') parser.add_argument('-c', '--clean_env', action='store_true', help='clean the environment after running.') parser.add_argument( "--vep", action=kipoi.cli.main.DeprecateAction, help= "This argument is deprecated. Please use https://github.com/kipoi/kipoi-veff2 directly" ) parser.add_argument('--common_env', action='store_true', help='Test models in common environments.') parser.add_argument('--all', action='store_true', help="Test all models in the source") parser.add_argument( '-v', '--verbose', action='store_true', help= "Increase output verbosity. Show conda stdout during env installation." ) parser.add_argument('--shard_id', type=int, default=-1, help="Shard id") parser.add_argument('--num_of_shards', type=int, default=-1, help="Number of shards") parser.add_argument('--singularity', action='store_true', help='Test models within their singularity containers') args = parser.parse_args(raw_args) if args.singularity and args.source != "kipoi": raise IOError( "Singularity containers are available for kipoi models only") if args.singularity and args.common_env: raise IOError("Please use only one of --singularity and --common_env") # -------------------------------------------- source = kipoi.get_source(args.source) all_models = all_models_to_test(source) if args.k is not None: all_models = [x for x in all_models if re.match(args.k, x)] if len(all_models) == 0: logger.info("No models found in the source") sys.exit(1) if args.all: test_models = all_models logger.info('Testing all models:\n- {0}'.format( '\n- '.join(test_models))) else: test_models = restrict_models_to_test(all_models, source, args.git_range) if len(test_models) == 0: logger.info("No model modified according to git, exiting.") sys.exit(0) logger.info('{0}/{1} models modified according to git:\n- {2}'.format( len(test_models), len(all_models), '\n- '.join(test_models))) # Sort the models alphabetically test_models = sorted(test_models) if args.num_of_shards > 0 and args.shard_id >= 0: if args.shard_id >= args.num_of_shards: logger.info( "Shard id is invalid. It should be a value between 0 and {0}.". format(args.num_of_shards - 1)) sys.exit(1) else: all_test_models = test_models sublists = np.array_split(all_test_models, args.num_of_shards) list_of_shards = [list(split) for split in sublists] test_models = list_of_shards[args.shard_id] logger.info(test_models) # Parse the repo config cfg_path = get_file_path(source.local_path, "config", extensions=[".yml", ".yaml"], raise_err=False) if cfg_path is not None: cfg = kipoi.specs.SourceConfig.load(cfg_path, append_path=False) logger.info("Found config {0}:\n{1}".format(cfg_path, cfg)) else: cfg = None if args.dry_run: logger.info( "-n/--dry_run enabled. Skipping model testing and exiting.") sys.exit(0) # TODO - make sure the modes are always tested in the same order? # - make sure the keras config doesn't get cluttered # Test common environments if args.common_env: logger.info("Installing common environmnets") import yaml models_yaml_path = os.path.join(source.local_path, SPECIAL_ENV_PREFIX, "models.yaml") if not os.path.exists(models_yaml_path): logger.error( "{} doesn't exists when installing the common environment". format(models_yaml_path)) sys.exit(1) model_envs = yaml.safe_load( open(os.path.join(source.local_path, SPECIAL_ENV_PREFIX, "models.yaml"), "r", encoding="utf-8")) test_envs = { get_common_env(m, model_envs) for m in test_models if get_common_env(m, model_envs) is not None } if len(test_envs) == 0: logger.info("No common environments to test") sys.exit(0) logger.info( "Instaling environments covering the following models: \n{}". format(yaml.dump(model_envs))) for env in test_envs: if env_exists(env): logger.info( "Common environment already exists: {}. Skipping the installation" .format(env)) else: logger.info("Installing environment: {}".format(env)) create_model_env(os.path.join(SPECIAL_ENV_PREFIX, env), args.source, env) logger.info("Running {0} tests..".format(len(test_models))) failed_models = [] for i in range(len(test_models)): m = test_models[i] print('-' * 20) print("{0}/{1} - model: {2}".format(i + 1, len(test_models), m)) print('-' * 20) try: if not args.common_env and not args.singularity: # Prepend "test-" to the standard kipoi env name env_name = conda_env_name(m, source=args.source) env_name = "test-" + env_name # Test test_model(m, args.source, env_name, get_batch_size(cfg, m, args.batch_size), create_env=True, verbose=args.verbose) elif args.singularity and not args.common_env: print("Testing within singularity container....") test_model_singularity(m, args.source, get_batch_size(cfg, m, args.batch_size), verbose=args.verbose) elif args.common_env and not args.singularity: # figure out the common environment name env_name = get_common_env(m, model_envs) if env_name is None: # skip is none was found logger.info( "Common environment not found for {}".format(m)) continue # --------------------------- # Test print("test_model...") test_model(m, args.source, env_name, get_batch_size(cfg, m, args.batch_size), create_env=False, verbose=args.verbose) else: raise IOError( "Please either choose --common_env or --singularity or none" ) except Exception as e: logger.error("Model {0} failed: {1}".format(m, e)) failed_models += [m] if args.exitfirst: if args.clean_env and not args.common_env: rm_env(env_name) sys.exit(1) finally: if args.clean_env and not args.common_env: rm_env(env_name) print('-' * 40) if failed_models: logger.error("{0}/{1} tests failed for models:\n- {2}".format( len(failed_models), len(test_models), "\n- ".join(failed_models))) sys.exit(1) logger.info('All tests ({0}) passed'.format(len(test_models)))
def cli_create(cmd, raw_args): """Create a conda environment for a model """ from kipoi_conda import get_kipoi_bin import uuid parser = argparse.ArgumentParser( 'kipoi env {}'.format(cmd), description='Create a conda environment for a specific model.') add_env_args(parser) parser.add_argument( '-e', '--env', default=None, help="Special environment name. default: kipoi-<model>[-<dataloader>]") parser.add_argument('--dry-run', action='store_true', help="Don't actually create the environment") parser.add_argument( '-t', '--tmpdir', default=None, help= ("Temporary directory path where to create the conda environment file" "Defaults to /tmp/kipoi/envfiles/<uuid>/")) parser.add_argument('-v', '--verbose', action='store_true', help="Increase output verbosity. Show conda stdout.") args = parser.parse_args(raw_args) # create the tmp dir if args.tmpdir is None: tmpdir = "/tmp/kipoi/envfiles/" + str(uuid.uuid4())[:8] else: tmpdir = args.tmpdir if not os.path.exists(tmpdir): os.makedirs(tmpdir) # write the env file logger.info("Writing environment file: {0}".format(tmpdir)) if args.model == ['all']: from kipoi.cli.source_test import get_common_env src = kipoi.get_source(args.source) model_envs = yaml.safe_load( open( os.path.join(src.local_path, SPECIAL_ENV_PREFIX, "models.yaml"))) # TODO - test this by mocking up the CLI command execution # setup the args for all the models df = kipoi.list_models() dfg = list_models_by_group(df, "") for model_group in dfg.group.unique().tolist(): existing_envs = get_envs_by_model(model_group, args.source, only_valid=True) if existing_envs or existing_envs is None: # No need to create the environment existing_envs_str = "\n".join( [e.create_args.env for e in existing_envs]) logger.info( "Environment for {} already exists ({}). Skipping installation" .format(model_group, existing_envs_str)) continue logger.info( "Environment doesn't exists for model group {}. Installing it". format(model_group)) # Figure out which <model> to use for installation common_env = get_common_env(model_group, model_envs) if common_env is not None: # common environment exists for the model. Use it logger.info("Using common environment: {}".format(common_env)) model_group = os.path.join(SPECIAL_ENV_PREFIX, common_env) # Run cli_create def optional_replace(x, ref, alt): if x == ref: return alt else: return x new_raw_args = [ optional_replace(x, 'all', model_group) for x in raw_args if x is not None ] cli_create(cmd, new_raw_args) logger.info("Done installing all environments!") return None env, env_file = export_env(args.model, args.dataloader, args.source, env_file=None, env_dir=tmpdir, env=args.env, vep=args.vep, interpret=args.interpret, gpu=args.gpu) if not args.dry_run: env_db_entry = generate_env_db_entry(args, args_env_overload=env) envdb = get_model_env_db() envdb.append(env_db_entry) envdb.save() # setup the conda env from file logger.info("Creating conda env from file: {0}".format(env_file)) kipoi_conda.create_env_from_file(env_file, use_stdout=args.verbose) env_db_entry.successful = True # env is environment name env_db_entry.cli_path = get_kipoi_bin(env) get_model_env_db().save() logger.info("Done!") print("\nActivate the environment via:") print("conda activate {0}".format(env)) else: print("Dry run. Conda file path: {}".format(env_file))
def cli_test_source(command, raw_args): """Runs test on the model """ assert command == "test-source" # setup the arg-parsing parser = argparse.ArgumentParser('kipoi {}'.format(command), description='Test models in model source') parser.add_argument('source', default="kipoi", help='Which source to test') parser.add_argument('--git-range', nargs='+', help='''Git range (e.g. commits or something like "master HEAD" to check commits in HEAD vs master, or just "HEAD" to include uncommitted changes). All models modified within this range will be tested.''') parser.add_argument('-n', '--dry_run', action='store_true', help='Dont run model testing') parser.add_argument('-b', '--batch_size', default=4, type=int, help='Batch size') parser.add_argument('-x', '--exitfirst', action='store_true', help='exit instantly on first error or failed test.') parser.add_argument('-k', default=None, help='only run tests which match the given substring expression') parser.add_argument('-c', '--clean_env', action='store_true', help='clean the environment after running.') parser.add_argument('--vep', action='store_true', help='Install the vep dependency.') parser.add_argument('--all', action='store_true', help="Test all models in the source") args = parser.parse_args(raw_args) # -------------------------------------------- source = kipoi.get_source(args.source) all_models = all_models_to_test(source) if args.k is not None: all_models = [x for x in all_models if re.match(args.k, x)] if len(all_models) == 0: logger.info("No models found in the source") sys.exit(1) if args.all: test_models = all_models logger.info('Testing all models:\n- {0}'. format('\n- '.join(test_models))) else: test_models = restrict_models_to_test(all_models, source, args.git_range) if len(test_models) == 0: logger.info("No model modified according to git, exiting.") sys.exit(0) logger.info('{0}/{1} models modified according to git:\n- {2}'. format(len(test_models), len(all_models), '\n- '.join(test_models))) # Sort the models alphabetically test_models = sorted(test_models) # Parse the repo config cfg_path = get_file_path(source.local_path, "config", extensions=[".yml", ".yaml"], raise_err=False) if cfg_path is not None: cfg = kipoi.specs.SourceConfig.load(cfg_path, append_path=False) logger.info("Found config {0}:\n{1}".format(cfg_path, cfg)) else: cfg = None if args.dry_run: logger.info("-n/--dry_run enabled. Skipping model testing and exiting.") sys.exit(0) # TODO - make sure the modes are always tested in the same order? # - make sure the keras config doesn't get cluttered logger.info("Running {0} tests..".format(len(test_models))) failed_models = [] for i in range(len(test_models)): m = test_models[i] print('-' * 20) print("{0}/{1} - model: {2}".format(i + 1, len(test_models), m)) print('-' * 20) try: env_name = conda_env_name(m, source=args.source) env_name = "test-" + env_name # prepend "test-" test_model(m, args.source, env_name, get_batch_size(cfg, m, args.batch_size), args.vep) except Exception as e: logger.error("Model {0} failed: {1}".format(m, e)) failed_models += [m] if args.exitfirst: if args.clean_env: rm_env(env_name) sys.exit(1) finally: if args.clean_env: rm_env(env_name) print('-' * 40) if failed_models: logger.error("{0}/{1} tests failed for models:\n- {2}". format(len(failed_models), len(test_models), "\n- ".join(failed_models))) sys.exit(1) logger.info('All tests ({0}) passed'.format(len(test_models)))
def get_model_list(source): """Cache for kipoi's list models""" df = kipoi.get_source(source).list_models() return df