Example #1
0
def test_group_job_resources_with_pipe(mocker):
    import copy
    from snakemake.executors import RealExecutor

    spy = mocker.spy(GroupResources, "basic_layered")

    # Cluster jobs normally get submitted with cores=all, but for testing purposes,
    # the system may not actually have enough cores to run the snakemake subprocess
    # (e.g. gh actions has only 2 cores). So cheat by patching over get_job_args
    get_job_args = copy.copy(RealExecutor.get_job_args)
    RealExecutor.get_job_args = lambda *args, **kwargs: get_job_args(
        *args, **{**kwargs, "cores": 6}
    )
    run(
        dpath("test_group_with_pipe"),
        cluster="./qsub",
        cores=6,
        resources={
            "mem_mb": 60000,
        },
        group_components={0: 5},
        default_resources=DefaultResources(["mem_mb=0"]),
    )
    assert dict(spy.spy_return) == {
        "_nodes": 1,
        "_cores": 6,
        "runtime": 240,
        "tmpdir": "/tmp",
        "mem_mb": 50000,
        "disk_mb": 1000,
    }
Example #2
0
def test_default_resources():
    from snakemake.resources import DefaultResources
    run(dpath("test_default_resources"),
        verbose=True,
        default_resources=DefaultResources([
            "mem_mb=max(2*input.size, 1000)", "disk_mb=max(2*input.size, 1000)"
        ]))
Example #3
0
def test_string_resources():
    from snakemake.resources import DefaultResources

    run(
        dpath("test_string_resources"),
        default_resources=DefaultResources(["gpu_model='nvidia-tesla-1000'"]),
        cluster="./qsub.py",
    )
Example #4
0
def test_github_issue1550():
    from snakemake.resources import DefaultResources

    run(
        dpath("test_github_issue1550"),
        resources={"mem_mb": 4000},
        default_resources=DefaultResources(
            ["mem_mb=max(2*input.size, 1000)", "disk_mb=max(2*input.size, 1000)"]
        ),
    )
Example #5
0
def test_default_resources():
    from snakemake.resources import DefaultResources

    run(
        dpath("test_default_resources"),
        # use fractional defaults here to test whether they are correctly rounded
        default_resources=DefaultResources(
            ["mem_mb=max(2*input.size, 1000.1)", "disk_mb=max(2*input.size, 1000.2)"]
        ),
    )
Example #6
0
def test_group_job_resources_with_pipe_with_too_much_constraint():
    run(
        dpath("test_group_with_pipe"),
        cluster="./qsub",
        cores=6,
        resources={
            "mem_mb": 20000,
        },
        group_components={0: 5},
        shouldfail=True,
        default_resources=DefaultResources(["mem_mb=0"]),
    )
Example #7
0
def test_excluded_resources_not_submitted_to_cluster(mocker):
    from snakemake.executors import AbstractExecutor

    spy = mocker.spy(AbstractExecutor, "get_resource_declarations")
    run(
        dpath("test_group_jobs_resources"),
        cluster="./qsub",
        cores=6,
        resources={"mem_mb": 60000},
        max_threads=1,
        overwrite_resource_scopes={"fake_res": "excluded"},
        group_components={0: 5},
        default_resources=DefaultResources(["mem_mb=0"]),
    )
    assert set(spy.spy_return.split()[1:]) == {
        "'mem_mb=60000'",
        "'global_res=3000'",
        "'disk_mb=3000'",
    }
Example #8
0
def test_group_jobs_resources(mocker):
    spy = mocker.spy(GroupResources, "basic_layered")
    run(
        dpath("test_group_jobs_resources"),
        cluster="./qsub",
        cores=6,
        resources={"typo": 23, "mem_mb": 60000},
        group_components={0: 5},
        default_resources=DefaultResources(["mem_mb=0"]),
    )
    assert dict(spy.spy_return) == {
        "_nodes": 1,
        "_cores": 6,
        "runtime": 420,
        "tmpdir": "/tmp",
        "mem_mb": 60000,
        "fake_res": 600,
        "global_res": 2000,
        "disk_mb": 2000,
    }
Example #9
0
def test_resources_can_be_overwritten_as_global():
    # Test only works if both mem_mb and global_res are overwritten as local
    tmp = run(
        dpath("test_group_jobs_resources"),
        cluster="./qsub",
        cluster_status="./status_failed",
        cores=6,
        nodes=5,
        cleanup=False,
        resources={"typo": 23, "fake_res": 200},
        overwrite_resource_scopes={"fake_res": {"local"}},
        group_components={0: 5, 1: 5},
        overwrite_groups={"a": 0, "a_1": 1, "b": 2, "c": 2},
        default_resources=DefaultResources(["mem_mb=0"]),
        shouldfail=True,
    )
    with (Path(tmp) / "qsub.log").open("r") as f:
        lines = [l for l in f.readlines() if not l == "\n"]
    assert len(lines) == 1
    shutil.rmtree(tmp)
Example #10
0
def test_global_resource_limits_limit_scheduling_of_groups():
    # Note that in the snakefile, mem_mb is set as global (breaking the default) and
    # fake_res is set as local
    tmp = run(
        dpath("test_group_jobs_resources"),
        cluster="./qsub",
        cluster_status="./status_failed",
        cores=6,
        nodes=5,
        cleanup=False,
        resources={"typo": 23, "mem_mb": 50000, "fake_res": 200},
        group_components={0: 5, 1: 5},
        overwrite_groups={"a": 0, "a_1": 1, "b": 2, "c": 2},
        default_resources=DefaultResources(["mem_mb=0"]),
        shouldfail=True,
    )
    with (Path(tmp) / "qsub.log").open("r") as f:
        lines = [l for l in f.readlines() if not l == "\n"]
    assert len(lines) == 1
    shutil.rmtree(tmp)
Example #11
0
def test_group_jobs_resources_with_limited_resources(mocker):
    spy = mocker.spy(GroupResources, "basic_layered")
    run(
        dpath("test_group_jobs_resources"),
        cluster="./qsub",
        cores=5,
        resources={"mem_mb": 10000},
        max_threads=1,
        group_components={0: 5},
        default_resources=DefaultResources(["mem_mb=0"]),
    )
    assert dict(spy.spy_return) == {
        "_nodes": 1,
        "_cores": 1,
        "runtime": 700,
        "tmpdir": "/tmp",
        "mem_mb": 10000,
        "fake_res": 400,
        "global_res": 1000,
        "disk_mb": 1000,
    }
Example #12
0
    def __init__(
        self,
        snakefile=None,
        jobscript=None,
        overwrite_shellcmd=None,
        overwrite_config=dict(),
        overwrite_workdir=None,
        overwrite_configfiles=None,
        overwrite_clusterconfig=dict(),
        overwrite_threads=dict(),
        config_args=None,
        debug=False,
        verbose=False,
        use_conda=False,
        conda_prefix=None,
        use_singularity=False,
        use_env_modules=False,
        singularity_prefix=None,
        singularity_args="",
        shadow_prefix=None,
        mode=Mode.default,
        wrapper_prefix=None,
        printshellcmds=False,
        restart_times=None,
        attempt=1,
        default_remote_provider=None,
        default_remote_prefix="",
        run_local=True,
        default_resources=None,
        cache=None,
        nodes=1,
        cores=1,
        resources=None,
        conda_cleanup_pkgs=False,
    ):
        """
        Create the controller.
        """

        self.global_resources = dict() if resources is None else resources
        self.global_resources["_cores"] = cores
        self.global_resources["_nodes"] = nodes

        self._rules = OrderedDict()
        self.first_rule = None
        self._workdir = None
        self.overwrite_workdir = overwrite_workdir
        self.workdir_init = os.path.abspath(os.curdir)
        self._ruleorder = Ruleorder()
        self._localrules = set()
        self.linemaps = dict()
        self.rule_count = 0
        self.basedir = os.path.dirname(snakefile)
        self.snakefile = os.path.abspath(snakefile)
        self.included = []
        self.included_stack = []
        self.jobscript = jobscript
        self.persistence = None
        self.globals = globals()
        self._subworkflows = dict()
        self.overwrite_shellcmd = overwrite_shellcmd
        self.overwrite_config = overwrite_config
        self.overwrite_configfiles = overwrite_configfiles
        self.overwrite_clusterconfig = overwrite_clusterconfig
        self.overwrite_threads = overwrite_threads
        self.config_args = config_args
        self.immediate_submit = None
        self._onsuccess = lambda log: None
        self._onerror = lambda log: None
        self._onstart = lambda log: None
        self._wildcard_constraints = dict()
        self.debug = debug
        self.verbose = verbose
        self._rulecount = 0
        self.use_conda = use_conda
        self.conda_prefix = conda_prefix
        self.use_singularity = use_singularity
        self.use_env_modules = use_env_modules
        self.singularity_prefix = singularity_prefix
        self.singularity_args = singularity_args
        self.shadow_prefix = shadow_prefix
        self.global_container_img = None
        self.mode = mode
        self.wrapper_prefix = wrapper_prefix
        self.printshellcmds = printshellcmds
        self.restart_times = restart_times
        self.attempt = attempt
        self.default_remote_provider = default_remote_provider
        self.default_remote_prefix = default_remote_prefix
        self.configfiles = []
        self.run_local = run_local
        self.report_text = None
        self.conda_cleanup_pkgs = conda_cleanup_pkgs
        # environment variables to pass to jobs
        # These are defined via the "envvars:" syntax in the Snakefile itself
        self.envvars = set()

        self.enable_cache = False
        if cache is not None:
            self.enable_cache = True
            self.cache_rules = set(cache)
            if self.default_remote_provider is not None:
                self.output_file_cache = RemoteOutputFileCache(
                    self.default_remote_provider)
            else:
                self.output_file_cache = LocalOutputFileCache()
        else:
            self.output_file_cache = None
            self.cache_rules = set()

        if default_resources is not None:
            self.default_resources = default_resources
        else:
            # only _cores and _nodes
            self.default_resources = DefaultResources()

        self.iocache = snakemake.io.IOCache()

        global config
        config = copy.deepcopy(self.overwrite_config)

        global cluster_config
        cluster_config = copy.deepcopy(self.overwrite_clusterconfig)

        global rules
        rules = Rules()
        global checkpoints
        checkpoints = Checkpoints()
Example #13
0
    def _generate_job_resources(self, job):
        """
        Given a particular job, generate the resources that it needs,
        including default regions and the virtual machine configuration
        """
        # Right now, do a best effort mapping of resources to instance types
        cores = job.resources.get("_cores", 1)
        mem_mb = job.resources.get("mem_mb", 15360)

        # IOPS performance proportional to disk size
        disk_mb = job.resources.get("disk_mb", 512000)

        # Convert mb to gb
        disk_gb = math.ceil(disk_mb / 1024)

        # Look for if the user wants an nvidia gpu
        gpu_count = job.resources.get("nvidia_gpu") or job.resources.get("gpu")
        gpu_model = job.resources.get("gpu_model")

        # If a gpu model is specified without a count, we assume 1
        if gpu_model and not gpu_count:
            gpu_count = 1

        # Update default resources using decided memory and disk
        # TODO why is this needed??
        self.default_resources = DefaultResources(
            from_other=self.workflow.default_resources)
        self.default_resources.set_resource("mem_mb", mem_mb)
        self.default_resources.set_resource("disk_mb", disk_mb)

        # Job resource specification can be overridden by gpu preferences
        self.machine_type_prefix = job.resources.get("machine_type")

        # If gpu wanted, limit to N1 general family, and update arguments
        if gpu_count:
            self._add_gpu(gpu_count)

        machine_types = self.get_available_machine_types()

        # Alert the user of machine_types available before filtering
        # https://cloud.google.com/compute/docs/machine-types
        logger.debug(
            "found {} machine types across regions {} before filtering "
            "to increase selection, define fewer regions".format(
                len(machine_types), self.regions))

        # First pass - eliminate anything that too low in cpu/memory
        keepers = dict()

        # Also keep track of max cpus and memory, in case none available
        max_cpu = 1
        max_mem = 15360

        for name, machine_type in machine_types.items():
            max_cpu = max(max_cpu, machine_type["guestCpus"])
            max_mem = max(max_mem, machine_type["memoryMb"])
            if machine_type["guestCpus"] < cores or machine_type[
                    "memoryMb"] < mem_mb:
                continue
            keepers[name] = machine_type

        # If a prefix is set, filter down to it
        if self.machine_type_prefix:
            machine_types = keepers
            keepers = dict()
            for name, machine_type in machine_types.items():
                if name.startswith(self.machine_type_prefix):
                    keepers[name] = machine_type

        # If we don't have any contenders, workflow error
        if not keepers:
            if self.machine_type_prefix:
                raise WorkflowError(
                    "Machine prefix {prefix} is too strict, or the resources cannot "
                    " be satisfied, so there are no options "
                    "available.".format(prefix=self.machine_type_prefix))
            else:
                raise WorkflowError(
                    "You requested {requestMemory} MB memory, {requestCpu} cores. "
                    "The maximum available are {availableMemory} MB memory and "
                    "{availableCpu} cores. These resources cannot be satisfied. "
                    "Please consider reducing the resource requirements of the "
                    "corresponding rule.".format(
                        requestMemory=mem_mb,
                        requestCpu=cores,
                        availableCpu=max_cpu,
                        availableMemory=max_mem,
                    ))

        # Now find (quasi) minimal to satisfy constraints
        machine_types = keepers

        # Select the first as the "smallest"
        smallest = list(machine_types.keys())[0]
        min_cores = machine_types[smallest]["guestCpus"]
        min_mem = machine_types[smallest]["memoryMb"]

        for name, machine_type in machine_types.items():
            if (machine_type["guestCpus"] < min_cores
                    and machine_type["memoryMb"] < min_mem):
                smallest = name
                min_cores = machine_type["guestCpus"]
                min_mem = machine_type["memoryMb"]

        selected = machine_types[smallest]
        logger.debug("Selected machine type {}:{}".format(
            smallest, selected["description"]))

        if job.is_group():
            preemptible = all(rule in self.preemptible_rules
                              for rule in job.rules)
            if not preemptible and any(rule in self.preemptible_rules
                                       for rule in job.rules):
                raise WorkflowError(
                    "All grouped rules should be homogenously set as preemptible rules"
                    "(see Defining groups for execution in snakemake documentation)"
                )
        else:
            preemptible = job.rule.name in self.preemptible_rules
        # We add the size for the image itself (10 GB) to bootDiskSizeGb
        virtual_machine = {
            "machineType": smallest,
            "labels": {
                "app": "snakemake"
            },
            "bootDiskSizeGb": disk_gb + 10,
            "preemptible": preemptible,
        }

        # If the user wants gpus, add accelerators here
        if gpu_count:
            accelerator = self._get_accelerator(gpu_count,
                                                zone=selected["zone"],
                                                gpu_model=gpu_model)
            virtual_machine["accelerators"] = [{
                "type": accelerator["name"],
                "count": gpu_count
            }]

        resources = {
            "regions": self.regions,
            "virtualMachine": virtual_machine
        }
        return resources
Example #14
0
    def __init__(
        self,
        snakefile=None,
        jobscript=None,
        overwrite_shellcmd=None,
        overwrite_config=dict(),
        overwrite_workdir=None,
        overwrite_configfiles=None,
        overwrite_clusterconfig=dict(),
        config_args=None,
        debug=False,
        use_conda=False,
        conda_prefix=None,
        use_singularity=False,
        singularity_prefix=None,
        singularity_args="",
        shadow_prefix=None,
        mode=Mode.default,
        wrapper_prefix=None,
        printshellcmds=False,
        restart_times=None,
        attempt=1,
        default_remote_provider=None,
        default_remote_prefix="",
        run_local=True,
        default_resources=None,
    ):
        """
        Create the controller.
        """
        self._rules = OrderedDict()
        self.first_rule = None
        self._workdir = None
        self.overwrite_workdir = overwrite_workdir
        self.workdir_init = os.path.abspath(os.curdir)
        self._ruleorder = Ruleorder()
        self._localrules = set()
        self.linemaps = dict()
        self.rule_count = 0
        self.basedir = os.path.dirname(snakefile)
        self.snakefile = os.path.abspath(snakefile)
        self.included = []
        self.included_stack = []
        self.jobscript = jobscript
        self.persistence = None
        self.global_resources = None
        self.globals = globals()
        self._subworkflows = dict()
        self.overwrite_shellcmd = overwrite_shellcmd
        self.overwrite_config = overwrite_config
        self.overwrite_configfiles = overwrite_configfiles
        self.overwrite_clusterconfig = overwrite_clusterconfig
        self.config_args = config_args
        self.immediate_submit = None
        self._onsuccess = lambda log: None
        self._onerror = lambda log: None
        self._onstart = lambda log: None
        self._wildcard_constraints = dict()
        self.debug = debug
        self._rulecount = 0
        self.use_conda = use_conda
        self.conda_prefix = conda_prefix
        self.use_singularity = use_singularity
        self.singularity_prefix = singularity_prefix
        self.singularity_args = singularity_args
        self.shadow_prefix = shadow_prefix
        self.global_singularity_img = None
        self.mode = mode
        self.wrapper_prefix = wrapper_prefix
        self.printshellcmds = printshellcmds
        self.restart_times = restart_times
        self.attempt = attempt
        self.default_remote_provider = default_remote_provider
        self.default_remote_prefix = default_remote_prefix
        self.configfiles = []
        self.run_local = run_local
        self.report_text = None
        if default_resources is not None:
            self.default_resources = default_resources
        else:
            # only _cores and _nodes
            self.default_resources = DefaultResources()

        self.iocache = snakemake.io.IOCache()

        global config
        config = copy.deepcopy(self.overwrite_config)

        global cluster_config
        cluster_config = copy.deepcopy(self.overwrite_clusterconfig)

        global rules
        rules = Rules()
        global checkpoints
        checkpoints = Checkpoints()