Example #1
0
def main():
    """ Primary workflow """
    parser = logmuse.add_logging_options(build_argparser())
    args, remaining_args = parser.parse_known_args()
    global _LOGGER
    _LOGGER = logmuse.logger_via_cli(args, make_root=True)
    _LOGGER.debug("refgenie {}".format(__version__))
    _LOGGER.debug("Args: {}".format(args))

    if not args.command:
        parser.print_help()
        _LOGGER.error("No command given")
        sys.exit(1)

    gencfg = refgenconf.select_genome_config(
        filename=args.genome_config,
        check_exist=not args.command == INIT_CMD,
        on_missing=lambda fp: fp,
        strict_env=True)
    if gencfg is None:
        raise MissingGenomeConfigError(args.genome_config)
    _LOGGER.debug("Determined genome config: {}".format(gencfg))

    # From user input we want to construct a list of asset dicts, where each
    # asset has a genome name, asset name, and tag

    if "asset_registry_paths" in args and args.asset_registry_paths:
        _LOGGER.debug("Found registry_path: {}".format(
            args.asset_registry_paths))
        asset_list = [
            parse_registry_path(x) for x in args.asset_registry_paths
        ]

        for a in asset_list:
            # every asset must have a genome, either provided via registry path
            # or the args.genome arg.
            if not a["genome"]:
                if args.genome:
                    a["genome"] = args.genome
                else:
                    _LOGGER.error(
                        "Provided asset registry path ({}/{}:{}) is invalid. See help for usage reference."
                        .format(a["genome"], a["asset"], a["tag"]))
                    sys.exit(1)
            else:
                if args.genome and args.genome != a["genome"]:
                    _LOGGER.warn(
                        "Two different genomes specified for asset '{}'.".
                        format(a["asset"]))

    else:
        if args.command in GENOME_ONLY_REQUIRED and not args.genome:
            parser.error("You must provide either a genome or a registry path")
            sys.exit(1)
        if args.command in ASSET_REQUIRED:
            parser.error("You must provide an asset registry path")
            sys.exit(1)

    if args.command == INIT_CMD:
        _LOGGER.debug("Initializing refgenie genome configuration")
        rgc = RefGenConf(entries=OrderedDict(
            {
                CFG_VERSION_KEY: REQ_CFG_VERSION,
                CFG_FOLDER_KEY: os.path.dirname(os.path.abspath(gencfg)),
                CFG_SERVERS_KEY: args.genome_server or [DEFAULT_SERVER],
                CFG_GENOMES_KEY: None
            }))
        rgc.initialize_config_file(os.path.abspath(gencfg))

    elif args.command == BUILD_CMD:
        if not all(
            [x["genome"] == asset_list[0]["genome"] for x in asset_list]):
            _LOGGER.error("Build can only build assets for one genome")
            sys.exit(1)
        recipe_name = None
        if args.recipe:
            if len(asset_list) > 1:
                _LOGGER.error(
                    "Recipes cannot be specified for multi-asset builds")
                sys.exit(1)
            recipe_name = args.recipe
        if args.requirements:
            for a in asset_list:
                recipe = recipe_name or a["asset"]
                if recipe not in asset_build_packages.keys():
                    _raise_missing_recipe_error(recipe)
                _LOGGER.info("'{}' recipe requirements: ".format(recipe))
                _make_asset_build_reqs(recipe)
            sys.exit(0)
        refgenie_build(gencfg, asset_list[0]["genome"], asset_list,
                       recipe_name, args)

    elif args.command == GET_ASSET_CMD:
        rgc = RefGenConf(filepath=gencfg, writable=False)
        check = args.check_exists if args.check_exists else None
        for a in asset_list:
            _LOGGER.debug("getting asset: '{}/{}.{}:{}'".format(
                a["genome"], a["asset"], a["seek_key"], a["tag"]))
            print(
                rgc.seek(a["genome"],
                         a["asset"],
                         a["tag"],
                         a["seek_key"],
                         strict_exists=check))
        return

    elif args.command == INSERT_CMD:
        rgc = RefGenConf(filepath=gencfg, writable=False)
        if len(asset_list) > 1:
            raise NotImplementedError("Can only add 1 asset at a time")
        else:
            refgenie_add(rgc, asset_list[0], args.path, args.force)

    elif args.command == PULL_CMD:
        rgc = RefGenConf(filepath=gencfg, writable=False)
        force = None if not args.force else True
        outdir = rgc[CFG_FOLDER_KEY]
        if not os.path.exists(outdir):
            raise MissingFolderError(outdir)
        target = _key_to_name(CFG_FOLDER_KEY)
        if not perm_check_x(outdir, target):
            return
        if not _single_folder_writeable(outdir):
            _LOGGER.error("Insufficient permissions to write to {}: {}".format(
                target, outdir))
            return

        for a in asset_list:
            rgc.pull(a["genome"],
                     a["asset"],
                     a["tag"],
                     unpack=not args.no_untar,
                     force=force)

    elif args.command in [LIST_LOCAL_CMD, LIST_REMOTE_CMD]:
        rgc = RefGenConf(filepath=gencfg, writable=False)
        if args.command == LIST_REMOTE_CMD:
            num_servers = 0
            # Keep all servers so that child updates maintain server list
            server_list = rgc[CFG_SERVERS_KEY]
            bad_servers = []
            for server_url in rgc[CFG_SERVERS_KEY]:
                num_servers += 1
                try:
                    rgc[CFG_SERVERS_KEY] = server_url
                    pfx, genomes, assets, recipes = _exec_list(
                        rgc, args.command == LIST_REMOTE_CMD, args.genome)
                    if assets is None and genomes is None:
                        continue
                    _LOGGER.info("{} genomes: {}".format(pfx, genomes))
                    if args.command != LIST_REMOTE_CMD:  # Not implemented yet
                        _LOGGER.info("{} recipes: {}".format(pfx, recipes))
                    _LOGGER.info("{} assets:\n{}\n".format(pfx, assets))
                except (DownloadJsonError, ConnectionError):
                    bad_servers.append(server_url)
                    continue
            if num_servers >= len(server_list) and bad_servers:
                _LOGGER.error(
                    "Could not list assets from the following server(s): {}".
                    format(bad_servers))
            # Restore original server list, even when we couldn't find assets on a server
            rgc[CFG_SERVERS_KEY] = server_list
        else:  # Only check local assets once
            _LOGGER.info("Server subscriptions: {}".format(", ".join(
                rgc[CFG_SERVERS_KEY])))
            pfx, genomes, assets, recipes = _exec_list(
                rgc, args.command == LIST_REMOTE_CMD, args.genome)
            _LOGGER.info("{} genomes: {}".format(pfx, genomes))
            if args.command != LIST_REMOTE_CMD:  # Not implemented yet
                _LOGGER.info("{} recipes: {}".format(pfx, recipes))
            _LOGGER.info("{} assets:\n{}".format(pfx, assets))

    elif args.command == GETSEQ_CMD:
        rgc = RefGenConf(filepath=gencfg, writable=False)
        rgc.getseq(rgc, args.genome, args.locus)

    elif args.command == REMOVE_CMD:
        force = args.force
        rgc = RefGenConf(filepath=gencfg)
        for a in asset_list:
            a["tag"] = a["tag"] or rgc.get_default_tag(
                a["genome"], a["asset"], use_existing=False)
            _LOGGER.debug("Determined tag for removal: {}".format(a["tag"]))
            if a["seek_key"] is not None:
                raise NotImplementedError(
                    "You can't remove a specific seek_key.")
            bundle = [a["genome"], a["asset"], a["tag"]]
            try:
                if not rgc.is_asset_complete(*bundle):
                    with rgc as r:
                        r.cfg_remove_assets(*bundle)
                    _LOGGER.info(
                        "Removed an incomplete asset '{}/{}:{}'".format(
                            *bundle))
                    return
            except (KeyError, MissingAssetError, MissingGenomeError):
                _LOGGER.info("Asset '{}/{}:{}' does not exist".format(*bundle))
                return
        if len(asset_list) > 1:
            if not query_yes_no(
                    "Are you sure you want to remove {} assets?".format(
                        len(asset_list))):
                _LOGGER.info("Action aborted by the user")
                return
            force = True
        for a in asset_list:
            rgc.remove(genome=a["genome"],
                       asset=a["asset"],
                       tag=a["tag"],
                       force=force)

    elif args.command == TAG_CMD:
        rgc = RefGenConf(filepath=gencfg)
        if len(asset_list) > 1:
            raise NotImplementedError("Can only tag 1 asset at a time")
        if args.default:
            # set the default tag and exit
            with rgc as r:
                r.set_default_pointer(a["genome"], a["asset"], a["tag"], True)
            sys.exit(0)
        rgc.tag(a["genome"], a["asset"], a["tag"], args.tag)

    elif args.command == ID_CMD:
        rgc = RefGenConf(filepath=gencfg, writable=False)
        if len(asset_list) == 1:
            g, a = asset_list[0]["genome"], asset_list[0]["asset"]
            t = asset_list[0]["tag"] or rgc.get_default_tag(g, a)
            print(rgc.id(g, a, t))
            return
        for asset in asset_list:
            g, a = asset["genome"], asset["asset"]
            t = asset["tag"] or rgc.get_default_tag(g, a)
            print("{}/{}:{},".format(g, a, t) + rgc.id(g, a, t))
        return
    elif args.command == SUBSCRIBE_CMD:
        rgc = RefGenConf(filepath=gencfg, writable=False)
        rgc.subscribe(urls=args.genome_server, reset=args.reset)
        return
    elif args.command == UNSUBSCRIBE_CMD:
        rgc = RefGenConf(filepath=gencfg, writable=False)
        rgc.unsubscribe(urls=args.genome_server)
        return
Example #2
0
def refgenie_add(rgc, asset_dict, path, force=False):
    """
    Add an external asset to the config.
    File existence is checked and asset files are transferred to the selected
    tag subdirectory

    :param refgenconf.RefGenConf rgc: genome configuration object
    :param dict asset_dict: a single parsed registry path
    :param str path: the path provided by the user. Must be relative to the
        specific genome directory
    :param bool force: whether the replacement of a possibly existing asset
        should be forced
    """
    # remove the first directory from the provided path if it is the genome name
    path = (
        os.path.join(*path.split(os.sep)[1:])
        if path.split(os.sep)[0] == asset_dict["genome"]
        else path
    )
    tag = asset_dict["tag"] or rgc.get_default_tag(
        asset_dict["genome"], asset_dict["asset"]
    )
    outfolder = os.path.abspath(os.path.join(rgc[CFG_FOLDER_KEY], asset_dict["genome"]))
    abs_asset_path = os.path.join(outfolder, path)
    if asset_dict["seek_key"] is None:
        # if seek_key is not specified we're about to move a directory to
        # the tag subdir
        tag_path = os.path.join(abs_asset_path, tag)
        from shutil import copytree as cp
    else:
        # if seek_key is specified we're about to move just a single file to
        # he tag subdir
        tag_path = os.path.join(os.path.dirname(abs_asset_path), tag)
        if not os.path.exists(tag_path):
            os.makedirs(tag_path)
        from shutil import copy2 as cp
    if os.path.exists(abs_asset_path):
        if not os.path.exists(tag_path):
            cp(abs_asset_path, tag_path)
        else:
            if not force and not query_yes_no(
                "Path '{}' exists. Do you want to overwrite?".format(tag_path)
            ):
                return False
            else:
                _remove(tag_path)
                cp(abs_asset_path, tag_path)
    else:
        raise OSError(
            "Absolute path '{}' does not exist. "
            "The provided path must be relative to: {}".format(
                abs_asset_path, rgc[CFG_FOLDER_KEY]
            )
        )
    rgc.make_writable()
    gat_bundle = [asset_dict["genome"], asset_dict["asset"], tag]
    td = {
        CFG_ASSET_PATH_KEY: path
        if os.path.isdir(abs_asset_path)
        else os.path.dirname(path)
    }
    rgc.update_tags(*gat_bundle, data=td)
    # seek_key points to the entire dir if not specified
    seek_key_value = (
        os.path.basename(abs_asset_path) if asset_dict["seek_key"] is not None else "."
    )
    sk = {asset_dict["seek_key"] or asset_dict["asset"]: seek_key_value}
    rgc.update_seek_keys(*gat_bundle, keys=sk)
    rgc.set_default_pointer(asset_dict["genome"], asset_dict["asset"], tag)
    # a separate update_tags call since we want to use the get_asset method
    # that requires a complete asset entry in rgc
    td = {CFG_ASSET_CHECKSUM_KEY: get_dir_digest(_seek(rgc, *gat_bundle))}
    rgc.update_tags(*gat_bundle, data=td)
    # Write the updated refgenie genome configuration
    rgc.write()
    rgc.make_readonly()
    return True
Example #3
0
def bulker_init(config_path, template_config_path, container_engine=None):
    """
    Initialize a config file.
    
    :param str config_path: path to bulker configuration file to 
        create/initialize
    :param str template_config_path: path to bulker configuration file to 
        copy FROM
    """
    if not config_path:
        _LOGGER.error("You must specify a file path to initialize.")
        return

    if not template_config_path:
        _LOGGER.error("You must specify a template config file path.")
        return

    if not container_engine:
        check_engines = ["docker", "singularity"]
        for engine in check_engines:
            if is_command_callable(engine):
                _LOGGER.info("Guessing container engine is {}.".format(engine))
                container_engine = engine
                break  # it's a priority list, stop at the first found engine

    if config_path and not (os.path.exists(config_path)
                            and not query_yes_no("Exists. Overwrite?")):
        # dcc.write(config_path)
        # Init should *also* write the templates.
        dest_folder = os.path.dirname(config_path)
        dest_templates_dir = os.path.join(dest_folder, TEMPLATE_SUBDIR)
        # templates_subdir =  TEMPLATE_SUBDIR
        copy_tree(os.path.dirname(template_config_path), dest_templates_dir)
        new_template = os.path.join(dest_folder,
                                    os.path.basename(template_config_path))
        bulker_config = yacman.YacAttMap(filepath=template_config_path,
                                         writable=True)
        _LOGGER.debug("Engine used: {}".format(container_engine))
        bulker_config.bulker.container_engine = container_engine
        if bulker_config.bulker.container_engine == "docker":
            bulker_config.bulker.executable_template = os.path.join(
                TEMPLATE_SUBDIR, DOCKER_EXE_TEMPLATE)
            bulker_config.bulker.shell_template = os.path.join(
                TEMPLATE_SUBDIR, DOCKER_SHELL_TEMPLATE)
            bulker_config.bulker.build_template = os.path.join(
                TEMPLATE_SUBDIR, DOCKER_BUILD_TEMPLATE)
        elif bulker_config.bulker.container_engine == "singularity":
            bulker_config.bulker.executable_template = os.path.join(
                TEMPLATE_SUBDIR, SINGULARITY_EXE_TEMPLATE)
            bulker_config.bulker.shell_template = os.path.join(
                TEMPLATE_SUBDIR, SINGULARITY_SHELL_TEMPLATE)
            bulker_config.bulker.build_template = os.path.join(
                TEMPLATE_SUBDIR, SINGULARITY_BUILD_TEMPLATE)
        bulker_config.bulker.rcfile = os.path.join(TEMPLATE_SUBDIR,
                                                   RCFILE_TEMPLATE)
        bulker_config.bulker.rcfile_strict = os.path.join(
            TEMPLATE_SUBDIR, RCFILE_STRICT_TEMPLATE)
        bulker_config.write(config_path)
        # copyfile(template_config_path, new_template)
        # os.rename(new_template, config_path)
        _LOGGER.info("Wrote new configuration file: {}".format(config_path))
    else:
        _LOGGER.warning(
            "Can't initialize, file exists: {} ".format(config_path))
Example #4
0
def bulker_load(manifest,
                cratevars,
                bcfg,
                exe_jinja2_template,
                shell_jinja2_template,
                crate_path=None,
                build=False,
                force=False):
    manifest_name = cratevars['crate']
    # We store them in folder: namespace/crate/version
    if not crate_path:
        crate_path = os.path.join(bcfg.bulker.default_crate_folder,
                                  cratevars['namespace'], manifest_name,
                                  cratevars['tag'])
    if not os.path.isabs(crate_path):
        crate_path = os.path.join(os.path.dirname(bcfg._file_path), crate_path)

    _LOGGER.debug("Crate path: {}".format(crate_path))
    _LOGGER.debug("cratevars: {}".format(cratevars))
    # Update the config file
    if not bcfg.bulker.crates:
        bcfg.bulker.crates = {}
    if not hasattr(bcfg.bulker.crates, cratevars['namespace']):
        bcfg.bulker.crates[cratevars['namespace']] = yacman.YacAttMap({})
    if not hasattr(bcfg.bulker.crates[cratevars['namespace']],
                   cratevars['crate']):
        bcfg.bulker.crates[cratevars['namespace']][
            cratevars['crate']] = yacman.YacAttMap({})
    if hasattr(bcfg.bulker.crates[cratevars['namespace']][cratevars['crate']],
               cratevars['tag']):
        _LOGGER.debug(bcfg.bulker.crates[cratevars['namespace']][
            cratevars['crate']].to_dict())
        if not (force or query_yes_no(
                "That manifest has already been loaded. Overwrite?")):
            return
        else:
            bcfg.bulker.crates[cratevars['namespace']][cratevars['crate']][str(
                cratevars['tag'])] = crate_path
            _LOGGER.warning(
                "Removing all executables in: {}".format(crate_path))
            try:
                shutil.rmtree(crate_path)
            except:
                _LOGGER.error(
                    "Error removing crate at {}. Did your crate path change? Remove it manually."
                    .format(crate_path))
    else:
        bcfg.bulker.crates[cratevars['namespace']][cratevars['crate']][str(
            cratevars['tag'])] = crate_path

    # Now make the crate

    # First add any imports

    mkdir(crate_path, exist_ok=True)
    if hasattr(manifest.manifest, "imports") and manifest.manifest.imports:
        for imp in manifest.manifest.imports:
            imp_cratevars = parse_registry_path(imp)
            imp_crate_path = os.path.join(bcfg.bulker.default_crate_folder,
                                          imp_cratevars['namespace'],
                                          imp_cratevars['crate'],
                                          imp_cratevars['tag'])
            if not os.path.isabs(imp_crate_path):
                imp_crate_path = os.path.join(os.path.dirname(bcfg._file_path),
                                              imp_crate_path)
            if not os.path.exists(imp_crate_path):
                _LOGGER.error("Can't import crate '{}' from '{}'".format(
                    imp, imp_crate_path))
                # Recursively load any non-existant imported crates.
                imp_manifest, imp_cratevars = load_remote_registry_path(
                    bcfg, imp, None)
                _LOGGER.debug(imp_manifest)
                _LOGGER.debug(imp_cratevars)
                bulker_load(imp_manifest,
                            imp_cratevars,
                            bcfg,
                            exe_jinja2_template,
                            shell_jinja2_template,
                            crate_path=None,
                            build=build,
                            force=force)
            _LOGGER.info("Importing crate '{}' from '{}'.".format(
                imp, imp_crate_path))
            copy_tree(imp_crate_path, crate_path)

    # should put this in a function
    def host_tool_specific_args(bcfg, pkg, hosttool_arg_key):
        _LOGGER.debug("Arg key: '{}'".format(hosttool_arg_key))
        # Here we're parsing the *image*, not the crate registry path.
        imvars = parse_registry_path_image(pkg['docker_image'])
        _LOGGER.debug(imvars)
        try:
            amap = bcfg.bulker.tool_args[imvars['namespace']][imvars['image']]
            if imvars['tag'] != 'default' and hasattr(amap, imvars['tag']):
                string = amap[imvars['tag']][hosttool_arg_key]
            else:
                string = amap.default[hosttool_arg_key]
            _LOGGER.debug(string)
            return string
        except:
            _LOGGER.debug("No host/tool args found.")
            return ""

    cmdlist = []
    cmd_count = 0
    if hasattr(manifest.manifest, "commands") and manifest.manifest.commands:
        for pkg in manifest.manifest.commands:
            _LOGGER.debug(pkg)
            pkg.update(bcfg.bulker)  # Add terms from the bulker config
            pkg = copy.deepcopy(
                yacman.YacAttMap(pkg))  # (otherwise it's just a dict)
            # We have to deepcopy it so that changes we make to pkg aren't reflected in bcfg.

            if pkg.container_engine == "singularity" and "singularity_image_folder" in pkg:
                pkg["singularity_image"] = os.path.basename(
                    pkg["docker_image"])
                pkg["namespace"] = os.path.dirname(pkg["docker_image"])

                if os.path.isabs(pkg["singularity_image_folder"]):
                    sif = pkg["singularity_image_folder"]
                else:
                    sif = os.path.join(os.path.dirname(bcfg._file_path),
                                       pkg["singularity_image_folder"])

                pkg["singularity_fullpath"] = os.path.join(
                    sif, pkg["namespace"], pkg["singularity_image"])

                mkdir(os.path.dirname(pkg["singularity_fullpath"]),
                      exist_ok=True)
            command = pkg["command"]
            path = os.path.join(crate_path, command)
            _LOGGER.debug("Writing {cmd}".format(cmd=path))
            cmdlist.append(command)

            # Add any host-specific tool-specific args
            hosttool_arg_key = "{engine}_args".format(
                engine=bcfg.bulker.container_engine)
            hts = host_tool_specific_args(bcfg, pkg, hosttool_arg_key)
            _LOGGER.debug("Adding host-tool args: {}".format(hts))
            if hasattr(pkg, hosttool_arg_key):
                pkg[hosttool_arg_key] += " " + hts
            else:
                pkg[hosttool_arg_key] = hts

            # Remove any excluded volumes from the package
            exclude_vols = host_tool_specific_args(bcfg, pkg,
                                                   "exclude_volumes")
            _LOGGER.debug("Volume list: {}".format(pkg["volumes"]))
            _LOGGER.debug("pkg: {}".format(pkg))
            if len(exclude_vols) > 0:
                for item in exclude_vols:
                    _LOGGER.debug("Excluding volume: '{}'".format(item))
                    try:
                        pkg["volumes"].remove(item)
                    except:
                        pass
                _LOGGER.debug("Volume list: {}".format(pkg["volumes"]))
            else:
                _LOGGER.debug("No excluded volumes")

            with open(path, "w") as fh:
                fh.write(exe_jinja2_template.render(pkg=pkg))
                os.chmod(path, 0o755)

            # shell commands
            path_shell = os.path.join(crate_path, "_" + command)
            _LOGGER.debug(
                "Writing shell command: '{cmd}'".format(cmd=path_shell))
            with open(path_shell, "w") as fh:
                fh.write(shell_jinja2_template.render(pkg=pkg))
                os.chmod(path_shell, 0o755)

            if build:
                buildscript = build.render(pkg=pkg)
                x = os.system(buildscript)
                if x != 0:
                    _LOGGER.error(
                        "------ Error building. Build script used: ------")
                    _LOGGER.error(buildscript)
                    _LOGGER.error(
                        "------------------------------------------------")
                _LOGGER.info("Container available at: {cmd}".format(
                    cmd=pkg["singularity_fullpath"]))

    # host commands
    host_cmdlist = []
    if hasattr(manifest.manifest,
               "host_commands") and manifest.manifest.host_commands:
        _LOGGER.info("Populating host commands")
        for cmd in manifest.manifest.host_commands:
            _LOGGER.debug(cmd)
            if not is_command_callable(cmd):
                _LOGGER.warning("Requested host command is not callable and "
                                "therefore not created: '{}'".format(cmd))
                continue
            local_exe = find_executable(cmd)
            path = os.path.join(crate_path, cmd)
            host_cmdlist.append(cmd)
            os.symlink(local_exe, path)

            # The old way: TODO: REMOVE THIS
            if False:
                populated_template = LOCAL_EXE_TEMPLATE.format(cmd=local_exe)
                with open(path, "w") as fh:
                    fh.write(populated_template)
                    os.chmod(path, 0o755)

    cmd_count = len(cmdlist)
    host_cmd_count = len(host_cmdlist)
    if cmd_count < 1 and host_cmd_count < 1:
        _LOGGER.error("No commands provided. Crate not created.")
        os.rmdir(crate_path)
        crate_path_parent = os.path.dirname(crate_path)
        if not os.listdir(crate_path_parent):
            os.rmdir(crate_path_parent)
        sys.exit(1)

    rp = "{namespace}/{crate}:{tag}".format(namespace=cratevars['namespace'],
                                            crate=cratevars['crate'],
                                            tag=cratevars['tag'])

    _LOGGER.info("Loading manifest: '{rp}'."
                 " Activate with 'bulker activate {rp}'.".format(rp=rp))
    if cmd_count > 0:
        _LOGGER.info("Commands available: {}".format(", ".join(cmdlist)))
    if host_cmd_count > 0:
        _LOGGER.info("Host commands available: {}".format(
            ", ".join(host_cmdlist)))

    bcfg.write()
Example #5
0
def main():
    """ Primary workflow """
    parser = logmuse.add_logging_options(build_argparser())
    args, remaining_args = parser.parse_known_args()
    global _LOGGER
    _LOGGER = logmuse.logger_via_cli(args, make_root=True)
    _LOGGER.debug(f"versions: refgenie {__version__} | refgenconf {rgc_version}")
    _LOGGER.debug(f"Args: {args}")

    if not args.command:
        parser.print_help()
        _LOGGER.error("No command given")
        sys.exit(1)

    if args.command == ALIAS_CMD and not args.subcommand:
        parser.print_help()
        _LOGGER.error("No alias subcommand command given")
        sys.exit(1)

    gencfg = select_genome_config(
        filename=args.genome_config,
        check_exist=not args.command == INIT_CMD,
        on_missing=lambda fp: fp,
        strict_env=True,
    )
    if gencfg is None:
        raise MissingGenomeConfigError(args.genome_config)
    _LOGGER.debug("Determined genome config: {}".format(gencfg))

    skip_read_lock = _skip_lock(args.skip_read_lock, gencfg)

    # From user input we want to construct a list of asset dicts, where each
    # asset has a genome name, asset name, and tag
    if "asset_registry_paths" in args and args.asset_registry_paths:
        _LOGGER.debug("Found registry_path: {}".format(args.asset_registry_paths))
        asset_list = [parse_registry_path(x) for x in args.asset_registry_paths]

        for a in asset_list:
            # every asset must have a genome, either provided via registry path
            # or the args.genome arg.
            if not a["genome"]:
                if args.genome:
                    a["genome"] = args.genome
                else:
                    _LOGGER.error(
                        "Provided asset registry path ({}/{}:{}) is invalid. See help for usage reference.".format(
                            a["genome"], a["asset"], a["tag"]
                        )
                    )
                    sys.exit(1)
            else:
                if args.genome and args.genome != a["genome"]:
                    _LOGGER.warn(
                        "Two different genomes specified for asset '{}'.".format(
                            a["asset"]
                        )
                    )

    else:
        if args.command in GENOME_ONLY_REQUIRED and not args.genome:
            parser.error("You must provide either a genome or a registry path")
            sys.exit(1)
        if args.command in ASSET_REQUIRED:
            parser.error("You must provide an asset registry path")
            sys.exit(1)

    if args.command == INIT_CMD:
        _LOGGER.debug("Initializing refgenie genome configuration")
        entries = OrderedDict(
            {
                CFG_VERSION_KEY: REQ_CFG_VERSION,
                CFG_FOLDER_KEY: os.path.dirname(os.path.abspath(gencfg)),
                CFG_SERVERS_KEY: args.genome_server or [DEFAULT_SERVER],
                CFG_GENOMES_KEY: None,
            }
        )
        if args.settings_json:
            if os.path.isfile(args.settings_json):
                with open(args.settings_json, "r") as json_file:
                    data = json.load(json_file)
                entries.update(data)
            else:
                raise FileNotFoundError(
                    "JSON file with config init settings does not exist: {}".format(
                        args.settings_json
                    )
                )
        if args.genome_folder:
            entries.update({CFG_FOLDER_KEY: args.genome_folder})
        if args.remote_url_base:
            entries.update({CFG_REMOTE_URL_BASE_KEY: args.remote_url_base})
        if args.genome_archive_folder:
            entries.update({CFG_ARCHIVE_KEY: args.genome_archive_folder})
        if args.genome_archive_config:
            entries.update({CFG_ARCHIVE_CONFIG_KEY: args.genome_archive_config})
        _LOGGER.debug("initializing with entries: {}".format(entries))
        rgc = RefGenConf(entries=entries, skip_read_lock=skip_read_lock)
        rgc.initialize_config_file(os.path.abspath(gencfg))

    elif args.command == BUILD_CMD:
        if not all([x["genome"] == asset_list[0]["genome"] for x in asset_list]):
            _LOGGER.error("Build can only build assets for one genome")
            sys.exit(1)
        recipe_name = None
        if args.recipe:
            if len(asset_list) > 1:
                _LOGGER.error("Recipes cannot be specified for multi-asset builds")
                sys.exit(1)
            recipe_name = args.recipe
        if args.requirements:
            for a in asset_list:
                recipe = recipe_name or a["asset"]
                if recipe not in asset_build_packages.keys():
                    _raise_missing_recipe_error(recipe)
                _LOGGER.info("'{}' recipe requirements: ".format(recipe))
                _make_asset_build_reqs(recipe)
            sys.exit(0)
        refgenie_build(gencfg, asset_list[0]["genome"], asset_list, recipe_name, args)

    elif args.command == GET_ASSET_CMD:
        rgc = RefGenConf(filepath=gencfg, writable=False, skip_read_lock=skip_read_lock)
        check = args.check_exists if args.check_exists else None
        for a in asset_list:
            _LOGGER.debug(
                "getting asset: '{}/{}.{}:{}'".format(
                    a["genome"], a["asset"], a["seek_key"], a["tag"]
                )
            )
            print(
                rgc.seek(
                    a["genome"],
                    a["asset"],
                    a["tag"],
                    a["seek_key"],
                    strict_exists=check,
                )
            )
        return

    elif args.command == INSERT_CMD:
        rgc = RefGenConf(filepath=gencfg, writable=False, skip_read_lock=skip_read_lock)

        if len(asset_list) > 1:
            raise NotImplementedError("Can only add 1 asset at a time")
        else:
            sk = args.seek_keys
            if sk:
                sk = json.loads(args.seek_keys)
            rgc.add(
                path=args.path,
                genome=asset_list[0]["genome"],
                asset=asset_list[0]["asset"],
                tag=asset_list[0]["tag"],
                seek_keys=sk,
                force=args.force,
            )

    elif args.command == PULL_CMD:
        rgc = RefGenConf(filepath=gencfg, writable=False, skip_read_lock=skip_read_lock)

        # existing assets overwriting
        if args.no_overwrite:
            force = False
        elif args.force_overwrite:
            force = True
        else:
            force = None
        # large archive pulling
        if args.no_large:
            force_large = False
        elif args.pull_large:
            force_large = True
        else:
            force_large = None
        # batch mode takes precedence over other choices
        if args.batch:
            force_large = True
            force = False

        outdir = rgc.data_dir
        if not os.path.exists(outdir):
            raise MissingFolderError(outdir)
        if not perm_check_x(outdir):
            return
        if not _single_folder_writeable(outdir):
            _LOGGER.error("Insufficient permissions to write to: {}".format(outdir))
            return

        for a in asset_list:
            rgc.pull(
                a["genome"],
                a["asset"],
                a["tag"],
                force=force,
                force_large=force_large,
                size_cutoff=args.size_cutoff,
            )

    elif args.command in [LIST_LOCAL_CMD, LIST_REMOTE_CMD]:
        rgc = RefGenConf(filepath=gencfg, writable=False, skip_read_lock=skip_read_lock)
        console = Console()
        if args.command == LIST_REMOTE_CMD:
            num_servers = 0
            bad_servers = []
            for server_url in rgc[CFG_SERVERS_KEY]:
                num_servers += 1
                try:
                    table = rgc.get_asset_table(
                        genomes=args.genome, server_url=server_url
                    )
                except (DownloadJsonError, ConnectionError, MissingSchema):
                    bad_servers.append(server_url)
                    continue
                else:
                    console.print(table)
            if num_servers >= len(rgc[CFG_SERVERS_KEY]) and bad_servers:
                _LOGGER.error(
                    "Could not list assets from the following servers: {}".format(
                        bad_servers
                    )
                )
        else:
            if args.recipes:
                print(", ".join(sorted(list(asset_build_packages.keys()))))
            else:
                console.print(rgc.get_asset_table(genomes=args.genome))

    elif args.command == GETSEQ_CMD:
        rgc = RefGenConf(filepath=gencfg, writable=False, skip_read_lock=skip_read_lock)
        print(rgc.getseq(args.genome, args.locus))

    elif args.command == REMOVE_CMD:
        force = args.force
        rgc = RefGenConf(filepath=gencfg, skip_read_lock=skip_read_lock)
        for a in asset_list:
            a["tag"] = a["tag"] or rgc.get_default_tag(
                a["genome"], a["asset"], use_existing=False
            )
            _LOGGER.debug("Determined tag for removal: {}".format(a["tag"]))
            if a["seek_key"] is not None:
                raise NotImplementedError("You can't remove a specific seek_key.")
            gat = {"genome": a["genome"], "asset": a["asset"], "tag": a["tag"]}
            try:
                if not rgc.is_asset_complete(**gat):
                    with rgc as r:
                        r.cfg_remove_assets(**gat)
                    _LOGGER.info(
                        "Removed an incomplete asset "
                        "'{genome}/{asset}:{tag}'".format(*gat)
                    )
                    return
            except (KeyError, MissingAssetError, MissingGenomeError):
                _LOGGER.info(
                    "Asset '{genome}/{asset}:{tag}' does not exist".format(**gat)
                )
                return
        if len(asset_list) > 1:
            if not query_yes_no(
                "Are you sure you want to remove {} assets?".format(len(asset_list))
            ):
                _LOGGER.info("Action aborted by the user")
                return
            force = True
        for a in asset_list:
            rgc.remove(genome=a["genome"], asset=a["asset"], tag=a["tag"], force=force)

    elif args.command == TAG_CMD:
        rgc = RefGenConf(filepath=gencfg, skip_read_lock=skip_read_lock)
        if len(asset_list) > 1:
            raise NotImplementedError("Can only tag 1 asset at a time")
        if args.default:
            # set the default tag and exit
            with rgc as r:
                r.set_default_pointer(a["genome"], a["asset"], a["tag"], True)
            sys.exit(0)
        rgc.tag(a["genome"], a["asset"], a["tag"], args.tag, force=args.force)

    elif args.command == ID_CMD:
        rgc = RefGenConf(filepath=gencfg, writable=False, skip_read_lock=skip_read_lock)
        if len(asset_list) == 1:
            g, a = asset_list[0]["genome"], asset_list[0]["asset"]
            t = asset_list[0]["tag"] or rgc.get_default_tag(g, a)
            print(rgc.id(g, a, t))
            return
        for asset in asset_list:
            g, a = asset["genome"], asset["asset"]
            t = asset["tag"] or rgc.get_default_tag(g, a)
            print("{}/{}:{},".format(g, a, t) + rgc.id(g, a, t))
        return
    elif args.command == SUBSCRIBE_CMD:
        rgc = RefGenConf(filepath=gencfg, writable=False, skip_read_lock=skip_read_lock)
        rgc.subscribe(urls=args.genome_server, reset=args.reset)
        return
    elif args.command == UNSUBSCRIBE_CMD:
        rgc = RefGenConf(filepath=gencfg, writable=False, skip_read_lock=skip_read_lock)
        rgc.unsubscribe(urls=args.genome_server)
        return
    elif args.command == ALIAS_CMD:
        rgc = RefGenConf(filepath=gencfg, skip_read_lock=skip_read_lock)
        if args.subcommand == ALIAS_GET_CMD:
            if args.aliases is not None:
                for a in args.aliases:
                    print(rgc.get_genome_alias_digest(alias=a))
                return
            console = Console()
            console.print(rgc.genome_aliases_table)

        if args.subcommand == ALIAS_SET_CMD:
            rgc.set_genome_alias(
                digest=args.digest,
                genome=args.aliases,
                reset_digest=args.reset,
                create_genome=args.force,
            )
            return
        elif args.subcommand == ALIAS_REMOVE_CMD:
            rgc.remove_genome_aliases(digest=args.digest, aliases=args.aliases)
            return

    elif args.command == COMPARE_CMD:
        rgc = RefGenConf(filepath=gencfg, writable=False, skip_read_lock=skip_read_lock)
        res = rgc.compare(
            args.genome1[0], args.genome2[0], explain=not args.no_explanation
        )
        if args.no_explanation:
            print(res)

    elif args.command == UPGRADE_CMD:
        upgrade_config(
            target_version=args.target_version, filepath=gencfg, force=args.force
        )
Example #6
0
def test_query_yesno_input(question, default, response, expected):
    """ Yes/No interaction responds to user input. """
    with mock.patch(READ_INPUT_PATH, return_value=response):
        assert query_yes_no(question, default) is expected
Example #7
0
def test_response_sequence(question, default, responses, expected):
    """ The interaction re-prompts and then responds as intended. """
    with mock.patch(READ_INPUT_PATH, side_effect=responses):
        assert expected is query_yes_no(question, default)
Example #8
0
def test_query_yesno_empty_with_default(question, default, expected, capsys):
    """ Default response is used when user input is empty. """
    with mock.patch(READ_INPUT_PATH, return_value=""):
        assert expected is query_yes_no(question, default)
Example #9
0
def test_illegal_default_yields_value_error(question, default):
    """ Illegal default response causes ValueError. """
    with pytest.raises(ValueError):
        query_yes_no(question, default)