def list_models(base_models): import kipoi dtm = kipoi.list_models() dtm = dtm[~dtm.model.str.contains("template")] dtm = dtm[dtm.source == 'kipoi'] dtm = dtm[dtm.model.str.contains("|".join(base_models))] # use only the CpGenie/merged model dtm = dtm[~(dtm.model.str.contains("CpGenie") & ~dtm.model.str.contains("merged"))] MODELS = list(dtm.model) return MODELS
def test_list_models(): k = kipoi.config.get_source("kipoi") df = k.list_models() assert isinstance(df, pd.DataFrame) # column names df_model_columns = ['model', 'version', 'authors', 'contributors', 'doc', 'type', 'inputs', 'targets', 'veff_score_variants', 'license', 'cite_as', 'trained_on', 'training_procedure', 'tags'] assert df_model_columns == list(df.columns) # df_all = kipoi.list_models() assert ["source"] + df_model_columns == list(df_all.columns) kipoi.get_model_descr("extended_coda") kipoi.get_model_descr("extended_coda", source="kipoi") # local files kipoi.get_model_descr("example/models/extended_coda", source="dir")
def cli_create(cmd, raw_args): """Create a conda environment for a model """ from kipoi_conda import get_kipoi_bin import uuid parser = argparse.ArgumentParser( 'kipoi env {}'.format(cmd), description='Create a conda environment for a specific model.') add_env_args(parser) parser.add_argument( '-e', '--env', default=None, help="Special environment name. default: kipoi-<model>[-<dataloader>]") parser.add_argument('--dry-run', action='store_true', help="Don't actually create the environment") parser.add_argument( '-t', '--tmpdir', default=None, help= ("Temporary directory path where to create the conda environment file" "Defaults to /tmp/kipoi/envfiles/<uuid>/")) parser.add_argument('-v', '--verbose', action='store_true', help="Increase output verbosity. Show conda stdout.") args = parser.parse_args(raw_args) # create the tmp dir if args.tmpdir is None: tmpdir = "/tmp/kipoi/envfiles/" + str(uuid.uuid4())[:8] else: tmpdir = args.tmpdir if not os.path.exists(tmpdir): os.makedirs(tmpdir) # write the env file logger.info("Writing environment file: {0}".format(tmpdir)) if args.model == ['all']: from kipoi.cli.source_test import get_common_env src = kipoi.get_source(args.source) model_envs = yaml.safe_load( open( os.path.join(src.local_path, SPECIAL_ENV_PREFIX, "models.yaml"))) # TODO - test this by mocking up the CLI command execution # setup the args for all the models df = kipoi.list_models() dfg = list_models_by_group(df, "") for model_group in dfg.group.unique().tolist(): existing_envs = get_envs_by_model(model_group, args.source, only_valid=True) if existing_envs or existing_envs is None: # No need to create the environment existing_envs_str = "\n".join( [e.create_args.env for e in existing_envs]) logger.info( "Environment for {} already exists ({}). Skipping installation" .format(model_group, existing_envs_str)) continue logger.info( "Environment doesn't exists for model group {}. Installing it". format(model_group)) # Figure out which <model> to use for installation common_env = get_common_env(model_group, model_envs) if common_env is not None: # common environment exists for the model. Use it logger.info("Using common environment: {}".format(common_env)) model_group = os.path.join(SPECIAL_ENV_PREFIX, common_env) # Run cli_create def optional_replace(x, ref, alt): if x == ref: return alt else: return x new_raw_args = [ optional_replace(x, 'all', model_group) for x in raw_args if x is not None ] cli_create(cmd, new_raw_args) logger.info("Done installing all environments!") return None env, env_file = export_env(args.model, args.dataloader, args.source, env_file=None, env_dir=tmpdir, env=args.env, vep=args.vep, interpret=args.interpret, gpu=args.gpu) if not args.dry_run: env_db_entry = generate_env_db_entry(args, args_env_overload=env) envdb = get_model_env_db() envdb.append(env_db_entry) envdb.save() # setup the conda env from file logger.info("Creating conda env from file: {0}".format(env_file)) kipoi_conda.create_env_from_file(env_file, use_stdout=args.verbose) env_db_entry.successful = True # env is environment name env_db_entry.cli_path = get_kipoi_bin(env) get_model_env_db().save() logger.info("Done!") print("\nActivate the environment via:") print("conda activate {0}".format(env)) else: print("Dry run. Conda file path: {}".format(env_file))
def df(): return kipoi.list_models()
source='kipoi') assert files == ['fasta_file'] assert isinstance(schema, dict) assert len(schema) == 919 schema, files = get_dataloader_descr("Basset", source='kipoi') assert files == ['fasta_file'] assert isinstance(schema, dict) assert len(schema) == 164 schema, files = get_dataloader_descr( "DeepBind/Homo_sapiens/TF/D00328.018_ChIP-seq_CTCF", source='kipoi') assert schema, files == ('float32', ['fasta_file']) df = kipoi.list_models() # get all models supporting variant effect prediction # TODO: The following code will not work as is. Remove it? dfv = df[df.veff_score_variants] # not sure why this wasn't kicked out dfv = dfv[~dfv.model.str.startswith("extended_coda")] # DeepCpG_DNA has variable length output which I don't know how to handle dfv = dfv[~dfv.model.str.startswith("DeepCpG_DNA")] # append the annotation for Hail dfv['hail_annotation'] = Parallel(4)(delayed(get_dataloader_descr)(m) for m in dfv.model)
def test_get_snippets(): models = kipoi.list_models().model.unique() for model_name in models: assert isinstance(get_snippets(model_name), dict)
def cli_ls(cmd, raw_args): """List all kipoi-induced conda environments """ dtm = kipoi.list_models() for m in list(dtm.source.str.cat(dtm.model, sep="::")): print(m)