def test_force_overwrite_asset(self, cfg_file, gname, aname, tname): rgc = RefGenConf(filepath=cfg_file) path = rgc.seek(genome_name=gname, asset_name="fasta", tag_name=tname, enclosing_dir=True) assert rgc.add(path, gname, aname, tname, force=True) assert rgc.add(path, gname, aname, tname, force=True)
def test_cant_add_without_digest_set_first(self, cfg_file, gname, aname, tname): rgc = RefGenConf(filepath=cfg_file) path = rgc.seek(genome_name=gname, asset_name="fasta", tag_name=tname, enclosing_dir=True) gname = gname + "_new" assert not rgc.add(path, gname, aname, tname)
def test_nofile(self, cfg_file, gname, aname, tname): rgc = RefGenConf(filepath=cfg_file) path = rgc.seek(genome_name=gname, asset_name="fasta", tag_name=tname, enclosing_dir=True) assert rgc.add(path, gname, aname, tname, seek_keys={"file": "b"}, force=True)
def test_pull_asset_updates_genome_config(cfg_file, gname, aname, tname): """ Test that the object that was identical prior to the asset pull differs afterwards and the pulled asset metadata has been written to the config file """ ori_rgc = RefGenConf(filepath=cfg_file, writable=False) rgc = RefGenConf(filepath=cfg_file, writable=False) remove_asset_and_file(rgc, gname, aname, tname) remove_asset_and_file(ori_rgc, gname, aname, tname) # ori_rgc.remove_assets(gname, aname, tname) assert ori_rgc.to_dict() == rgc.to_dict() with mock.patch("refgenconf.refgenconf.query_yes_no", return_value=True): print("\nPulling; genome: {}, asset: {}, tag: {}\n".format(gname, aname, tname)) rgc.pull(gname, aname, tname) assert not ori_rgc.to_dict() == rgc.to_dict() post_rgc = RefGenConf(filepath=cfg_file, writable=False) assert isinstance(post_rgc.seek(gname, aname, tname), str)
def test_preexisting_asset_prompt(self, cfg_file, gname, aname, tname): rgc = RefGenConf(filepath=cfg_file) path = rgc.seek(genome_name=gname, asset_name=aname, tag_name=tname) with mock.patch("refgenconf.refgenconf.query_yes_no", return_value=False): assert not rgc.add(path, gname, aname, tname)
def main(): """ Primary workflow """ parser = logmuse.add_logging_options(build_argparser()) args, remaining_args = parser.parse_known_args() global _LOGGER _LOGGER = logmuse.logger_via_cli(args, make_root=True) _LOGGER.debug("refgenie {}".format(__version__)) _LOGGER.debug("Args: {}".format(args)) if not args.command: parser.print_help() _LOGGER.error("No command given") sys.exit(1) gencfg = refgenconf.select_genome_config( filename=args.genome_config, check_exist=not args.command == INIT_CMD, on_missing=lambda fp: fp, strict_env=True) if gencfg is None: raise MissingGenomeConfigError(args.genome_config) _LOGGER.debug("Determined genome config: {}".format(gencfg)) # From user input we want to construct a list of asset dicts, where each # asset has a genome name, asset name, and tag if "asset_registry_paths" in args and args.asset_registry_paths: _LOGGER.debug("Found registry_path: {}".format( args.asset_registry_paths)) asset_list = [ parse_registry_path(x) for x in args.asset_registry_paths ] for a in asset_list: # every asset must have a genome, either provided via registry path # or the args.genome arg. if not a["genome"]: if args.genome: a["genome"] = args.genome else: _LOGGER.error( "Provided asset registry path ({}/{}:{}) is invalid. See help for usage reference." .format(a["genome"], a["asset"], a["tag"])) sys.exit(1) else: if args.genome and args.genome != a["genome"]: _LOGGER.warn( "Two different genomes specified for asset '{}'.". format(a["asset"])) else: if args.command in GENOME_ONLY_REQUIRED and not args.genome: parser.error("You must provide either a genome or a registry path") sys.exit(1) if args.command in ASSET_REQUIRED: parser.error("You must provide an asset registry path") sys.exit(1) if args.command == INIT_CMD: _LOGGER.debug("Initializing refgenie genome configuration") rgc = RefGenConf(entries=OrderedDict( { CFG_VERSION_KEY: REQ_CFG_VERSION, CFG_FOLDER_KEY: os.path.dirname(os.path.abspath(gencfg)), CFG_SERVERS_KEY: args.genome_server or [DEFAULT_SERVER], CFG_GENOMES_KEY: None })) rgc.initialize_config_file(os.path.abspath(gencfg)) elif args.command == BUILD_CMD: if not all( [x["genome"] == asset_list[0]["genome"] for x in asset_list]): _LOGGER.error("Build can only build assets for one genome") sys.exit(1) recipe_name = None if args.recipe: if len(asset_list) > 1: _LOGGER.error( "Recipes cannot be specified for multi-asset builds") sys.exit(1) recipe_name = args.recipe if args.requirements: for a in asset_list: recipe = recipe_name or a["asset"] if recipe not in asset_build_packages.keys(): _raise_missing_recipe_error(recipe) _LOGGER.info("'{}' recipe requirements: ".format(recipe)) _make_asset_build_reqs(recipe) sys.exit(0) refgenie_build(gencfg, asset_list[0]["genome"], asset_list, recipe_name, args) elif args.command == GET_ASSET_CMD: rgc = RefGenConf(filepath=gencfg, writable=False) check = args.check_exists if args.check_exists else None for a in asset_list: _LOGGER.debug("getting asset: '{}/{}.{}:{}'".format( a["genome"], a["asset"], a["seek_key"], a["tag"])) print( rgc.seek(a["genome"], a["asset"], a["tag"], a["seek_key"], strict_exists=check)) return elif args.command == INSERT_CMD: rgc = RefGenConf(filepath=gencfg, writable=False) if len(asset_list) > 1: raise NotImplementedError("Can only add 1 asset at a time") else: refgenie_add(rgc, asset_list[0], args.path, args.force) elif args.command == PULL_CMD: rgc = RefGenConf(filepath=gencfg, writable=False) force = None if not args.force else True outdir = rgc[CFG_FOLDER_KEY] if not os.path.exists(outdir): raise MissingFolderError(outdir) target = _key_to_name(CFG_FOLDER_KEY) if not perm_check_x(outdir, target): return if not _single_folder_writeable(outdir): _LOGGER.error("Insufficient permissions to write to {}: {}".format( target, outdir)) return for a in asset_list: rgc.pull(a["genome"], a["asset"], a["tag"], unpack=not args.no_untar, force=force) elif args.command in [LIST_LOCAL_CMD, LIST_REMOTE_CMD]: rgc = RefGenConf(filepath=gencfg, writable=False) if args.command == LIST_REMOTE_CMD: num_servers = 0 # Keep all servers so that child updates maintain server list server_list = rgc[CFG_SERVERS_KEY] bad_servers = [] for server_url in rgc[CFG_SERVERS_KEY]: num_servers += 1 try: rgc[CFG_SERVERS_KEY] = server_url pfx, genomes, assets, recipes = _exec_list( rgc, args.command == LIST_REMOTE_CMD, args.genome) if assets is None and genomes is None: continue _LOGGER.info("{} genomes: {}".format(pfx, genomes)) if args.command != LIST_REMOTE_CMD: # Not implemented yet _LOGGER.info("{} recipes: {}".format(pfx, recipes)) _LOGGER.info("{} assets:\n{}\n".format(pfx, assets)) except (DownloadJsonError, ConnectionError): bad_servers.append(server_url) continue if num_servers >= len(server_list) and bad_servers: _LOGGER.error( "Could not list assets from the following server(s): {}". format(bad_servers)) # Restore original server list, even when we couldn't find assets on a server rgc[CFG_SERVERS_KEY] = server_list else: # Only check local assets once _LOGGER.info("Server subscriptions: {}".format(", ".join( rgc[CFG_SERVERS_KEY]))) pfx, genomes, assets, recipes = _exec_list( rgc, args.command == LIST_REMOTE_CMD, args.genome) _LOGGER.info("{} genomes: {}".format(pfx, genomes)) if args.command != LIST_REMOTE_CMD: # Not implemented yet _LOGGER.info("{} recipes: {}".format(pfx, recipes)) _LOGGER.info("{} assets:\n{}".format(pfx, assets)) elif args.command == GETSEQ_CMD: rgc = RefGenConf(filepath=gencfg, writable=False) rgc.getseq(rgc, args.genome, args.locus) elif args.command == REMOVE_CMD: force = args.force rgc = RefGenConf(filepath=gencfg) for a in asset_list: a["tag"] = a["tag"] or rgc.get_default_tag( a["genome"], a["asset"], use_existing=False) _LOGGER.debug("Determined tag for removal: {}".format(a["tag"])) if a["seek_key"] is not None: raise NotImplementedError( "You can't remove a specific seek_key.") bundle = [a["genome"], a["asset"], a["tag"]] try: if not rgc.is_asset_complete(*bundle): with rgc as r: r.cfg_remove_assets(*bundle) _LOGGER.info( "Removed an incomplete asset '{}/{}:{}'".format( *bundle)) return except (KeyError, MissingAssetError, MissingGenomeError): _LOGGER.info("Asset '{}/{}:{}' does not exist".format(*bundle)) return if len(asset_list) > 1: if not query_yes_no( "Are you sure you want to remove {} assets?".format( len(asset_list))): _LOGGER.info("Action aborted by the user") return force = True for a in asset_list: rgc.remove(genome=a["genome"], asset=a["asset"], tag=a["tag"], force=force) elif args.command == TAG_CMD: rgc = RefGenConf(filepath=gencfg) if len(asset_list) > 1: raise NotImplementedError("Can only tag 1 asset at a time") if args.default: # set the default tag and exit with rgc as r: r.set_default_pointer(a["genome"], a["asset"], a["tag"], True) sys.exit(0) rgc.tag(a["genome"], a["asset"], a["tag"], args.tag) elif args.command == ID_CMD: rgc = RefGenConf(filepath=gencfg, writable=False) if len(asset_list) == 1: g, a = asset_list[0]["genome"], asset_list[0]["asset"] t = asset_list[0]["tag"] or rgc.get_default_tag(g, a) print(rgc.id(g, a, t)) return for asset in asset_list: g, a = asset["genome"], asset["asset"] t = asset["tag"] or rgc.get_default_tag(g, a) print("{}/{}:{},".format(g, a, t) + rgc.id(g, a, t)) return elif args.command == SUBSCRIBE_CMD: rgc = RefGenConf(filepath=gencfg, writable=False) rgc.subscribe(urls=args.genome_server, reset=args.reset) return elif args.command == UNSUBSCRIBE_CMD: rgc = RefGenConf(filepath=gencfg, writable=False) rgc.unsubscribe(urls=args.genome_server) return
def main(): """ Primary workflow """ parser = logmuse.add_logging_options(build_argparser()) args, remaining_args = parser.parse_known_args() global _LOGGER _LOGGER = logmuse.logger_via_cli(args, make_root=True) _LOGGER.debug(f"versions: refgenie {__version__} | refgenconf {rgc_version}") _LOGGER.debug(f"Args: {args}") if not args.command: parser.print_help() _LOGGER.error("No command given") sys.exit(1) if args.command == ALIAS_CMD and not args.subcommand: parser.print_help() _LOGGER.error("No alias subcommand command given") sys.exit(1) gencfg = select_genome_config( filename=args.genome_config, check_exist=not args.command == INIT_CMD, on_missing=lambda fp: fp, strict_env=True, ) if gencfg is None: raise MissingGenomeConfigError(args.genome_config) _LOGGER.debug("Determined genome config: {}".format(gencfg)) skip_read_lock = _skip_lock(args.skip_read_lock, gencfg) # From user input we want to construct a list of asset dicts, where each # asset has a genome name, asset name, and tag if "asset_registry_paths" in args and args.asset_registry_paths: _LOGGER.debug("Found registry_path: {}".format(args.asset_registry_paths)) asset_list = [parse_registry_path(x) for x in args.asset_registry_paths] for a in asset_list: # every asset must have a genome, either provided via registry path # or the args.genome arg. if not a["genome"]: if args.genome: a["genome"] = args.genome else: _LOGGER.error( "Provided asset registry path ({}/{}:{}) is invalid. See help for usage reference.".format( a["genome"], a["asset"], a["tag"] ) ) sys.exit(1) else: if args.genome and args.genome != a["genome"]: _LOGGER.warn( "Two different genomes specified for asset '{}'.".format( a["asset"] ) ) else: if args.command in GENOME_ONLY_REQUIRED and not args.genome: parser.error("You must provide either a genome or a registry path") sys.exit(1) if args.command in ASSET_REQUIRED: parser.error("You must provide an asset registry path") sys.exit(1) if args.command == INIT_CMD: _LOGGER.debug("Initializing refgenie genome configuration") entries = OrderedDict( { CFG_VERSION_KEY: REQ_CFG_VERSION, CFG_FOLDER_KEY: os.path.dirname(os.path.abspath(gencfg)), CFG_SERVERS_KEY: args.genome_server or [DEFAULT_SERVER], CFG_GENOMES_KEY: None, } ) if args.settings_json: if os.path.isfile(args.settings_json): with open(args.settings_json, "r") as json_file: data = json.load(json_file) entries.update(data) else: raise FileNotFoundError( "JSON file with config init settings does not exist: {}".format( args.settings_json ) ) if args.genome_folder: entries.update({CFG_FOLDER_KEY: args.genome_folder}) if args.remote_url_base: entries.update({CFG_REMOTE_URL_BASE_KEY: args.remote_url_base}) if args.genome_archive_folder: entries.update({CFG_ARCHIVE_KEY: args.genome_archive_folder}) if args.genome_archive_config: entries.update({CFG_ARCHIVE_CONFIG_KEY: args.genome_archive_config}) _LOGGER.debug("initializing with entries: {}".format(entries)) rgc = RefGenConf(entries=entries, skip_read_lock=skip_read_lock) rgc.initialize_config_file(os.path.abspath(gencfg)) elif args.command == BUILD_CMD: if not all([x["genome"] == asset_list[0]["genome"] for x in asset_list]): _LOGGER.error("Build can only build assets for one genome") sys.exit(1) recipe_name = None if args.recipe: if len(asset_list) > 1: _LOGGER.error("Recipes cannot be specified for multi-asset builds") sys.exit(1) recipe_name = args.recipe if args.requirements: for a in asset_list: recipe = recipe_name or a["asset"] if recipe not in asset_build_packages.keys(): _raise_missing_recipe_error(recipe) _LOGGER.info("'{}' recipe requirements: ".format(recipe)) _make_asset_build_reqs(recipe) sys.exit(0) refgenie_build(gencfg, asset_list[0]["genome"], asset_list, recipe_name, args) elif args.command == GET_ASSET_CMD: rgc = RefGenConf(filepath=gencfg, writable=False, skip_read_lock=skip_read_lock) check = args.check_exists if args.check_exists else None for a in asset_list: _LOGGER.debug( "getting asset: '{}/{}.{}:{}'".format( a["genome"], a["asset"], a["seek_key"], a["tag"] ) ) print( rgc.seek( a["genome"], a["asset"], a["tag"], a["seek_key"], strict_exists=check, ) ) return elif args.command == INSERT_CMD: rgc = RefGenConf(filepath=gencfg, writable=False, skip_read_lock=skip_read_lock) if len(asset_list) > 1: raise NotImplementedError("Can only add 1 asset at a time") else: sk = args.seek_keys if sk: sk = json.loads(args.seek_keys) rgc.add( path=args.path, genome=asset_list[0]["genome"], asset=asset_list[0]["asset"], tag=asset_list[0]["tag"], seek_keys=sk, force=args.force, ) elif args.command == PULL_CMD: rgc = RefGenConf(filepath=gencfg, writable=False, skip_read_lock=skip_read_lock) # existing assets overwriting if args.no_overwrite: force = False elif args.force_overwrite: force = True else: force = None # large archive pulling if args.no_large: force_large = False elif args.pull_large: force_large = True else: force_large = None # batch mode takes precedence over other choices if args.batch: force_large = True force = False outdir = rgc.data_dir if not os.path.exists(outdir): raise MissingFolderError(outdir) if not perm_check_x(outdir): return if not _single_folder_writeable(outdir): _LOGGER.error("Insufficient permissions to write to: {}".format(outdir)) return for a in asset_list: rgc.pull( a["genome"], a["asset"], a["tag"], force=force, force_large=force_large, size_cutoff=args.size_cutoff, ) elif args.command in [LIST_LOCAL_CMD, LIST_REMOTE_CMD]: rgc = RefGenConf(filepath=gencfg, writable=False, skip_read_lock=skip_read_lock) console = Console() if args.command == LIST_REMOTE_CMD: num_servers = 0 bad_servers = [] for server_url in rgc[CFG_SERVERS_KEY]: num_servers += 1 try: table = rgc.get_asset_table( genomes=args.genome, server_url=server_url ) except (DownloadJsonError, ConnectionError, MissingSchema): bad_servers.append(server_url) continue else: console.print(table) if num_servers >= len(rgc[CFG_SERVERS_KEY]) and bad_servers: _LOGGER.error( "Could not list assets from the following servers: {}".format( bad_servers ) ) else: if args.recipes: print(", ".join(sorted(list(asset_build_packages.keys())))) else: console.print(rgc.get_asset_table(genomes=args.genome)) elif args.command == GETSEQ_CMD: rgc = RefGenConf(filepath=gencfg, writable=False, skip_read_lock=skip_read_lock) print(rgc.getseq(args.genome, args.locus)) elif args.command == REMOVE_CMD: force = args.force rgc = RefGenConf(filepath=gencfg, skip_read_lock=skip_read_lock) for a in asset_list: a["tag"] = a["tag"] or rgc.get_default_tag( a["genome"], a["asset"], use_existing=False ) _LOGGER.debug("Determined tag for removal: {}".format(a["tag"])) if a["seek_key"] is not None: raise NotImplementedError("You can't remove a specific seek_key.") gat = {"genome": a["genome"], "asset": a["asset"], "tag": a["tag"]} try: if not rgc.is_asset_complete(**gat): with rgc as r: r.cfg_remove_assets(**gat) _LOGGER.info( "Removed an incomplete asset " "'{genome}/{asset}:{tag}'".format(*gat) ) return except (KeyError, MissingAssetError, MissingGenomeError): _LOGGER.info( "Asset '{genome}/{asset}:{tag}' does not exist".format(**gat) ) return if len(asset_list) > 1: if not query_yes_no( "Are you sure you want to remove {} assets?".format(len(asset_list)) ): _LOGGER.info("Action aborted by the user") return force = True for a in asset_list: rgc.remove(genome=a["genome"], asset=a["asset"], tag=a["tag"], force=force) elif args.command == TAG_CMD: rgc = RefGenConf(filepath=gencfg, skip_read_lock=skip_read_lock) if len(asset_list) > 1: raise NotImplementedError("Can only tag 1 asset at a time") if args.default: # set the default tag and exit with rgc as r: r.set_default_pointer(a["genome"], a["asset"], a["tag"], True) sys.exit(0) rgc.tag(a["genome"], a["asset"], a["tag"], args.tag, force=args.force) elif args.command == ID_CMD: rgc = RefGenConf(filepath=gencfg, writable=False, skip_read_lock=skip_read_lock) if len(asset_list) == 1: g, a = asset_list[0]["genome"], asset_list[0]["asset"] t = asset_list[0]["tag"] or rgc.get_default_tag(g, a) print(rgc.id(g, a, t)) return for asset in asset_list: g, a = asset["genome"], asset["asset"] t = asset["tag"] or rgc.get_default_tag(g, a) print("{}/{}:{},".format(g, a, t) + rgc.id(g, a, t)) return elif args.command == SUBSCRIBE_CMD: rgc = RefGenConf(filepath=gencfg, writable=False, skip_read_lock=skip_read_lock) rgc.subscribe(urls=args.genome_server, reset=args.reset) return elif args.command == UNSUBSCRIBE_CMD: rgc = RefGenConf(filepath=gencfg, writable=False, skip_read_lock=skip_read_lock) rgc.unsubscribe(urls=args.genome_server) return elif args.command == ALIAS_CMD: rgc = RefGenConf(filepath=gencfg, skip_read_lock=skip_read_lock) if args.subcommand == ALIAS_GET_CMD: if args.aliases is not None: for a in args.aliases: print(rgc.get_genome_alias_digest(alias=a)) return console = Console() console.print(rgc.genome_aliases_table) if args.subcommand == ALIAS_SET_CMD: rgc.set_genome_alias( digest=args.digest, genome=args.aliases, reset_digest=args.reset, create_genome=args.force, ) return elif args.subcommand == ALIAS_REMOVE_CMD: rgc.remove_genome_aliases(digest=args.digest, aliases=args.aliases) return elif args.command == COMPARE_CMD: rgc = RefGenConf(filepath=gencfg, writable=False, skip_read_lock=skip_read_lock) res = rgc.compare( args.genome1[0], args.genome2[0], explain=not args.no_explanation ) if args.no_explanation: print(res) elif args.command == UPGRADE_CMD: upgrade_config( target_version=args.target_version, filepath=gencfg, force=args.force )
def test_nofile(self, cfg_file, gname, aname, tname): rgc = RefGenConf(filepath=cfg_file) pth = rgc.seek(gname, aname, tname, enclosing_dir=True) rgc_new = RefGenConf() assert rgc_new.add(pth, gname, aname, tname, seek_keys={"file": "b"})