def cpu_and_memory(programs, items): """Retrieve CPU and memory/core specified in configuration input. """ assert len(items) > 0, "Finding job resources but no items to process" config = items[0]["config"] all_cores = [] all_memory = [] algs = [config_utils.get_algorithm_config(x) for x in items] progs = _get_resource_programs(programs, algs) # Calculate cores for prog in progs: resources = config_utils.get_resources(prog, config) all_cores.append(resources.get("cores", 1)) if len(all_cores) == 0: all_cores.append(1) cores_per_job = max(all_cores) # Calculate memory. Use 1Gb memory usage per core as min baseline if not specified for prog in progs: resources = config_utils.get_resources(prog, config) memory = _get_prog_memory(resources, cores_per_job) if memory: all_memory.append(memory) if len(all_memory) == 0: all_memory.append(1) memory_per_core = max(all_memory) return cores_per_job, memory_per_core
def calculate(parallel, items, sysinfo, config, multiplier=1, max_multicore=None): """Determine cores and workers to use for this stage based on used programs. multiplier specifies the number of regions items will be split into during processing. max_multicore specifies an optional limit on the maximum cores. Can use to force single core processing during specific tasks. sysinfo specifies cores and memory on processing nodes, allowing us to tailor jobs for available resources. """ assert len(items) > 0, "Finding job resources but no items to process" all_cores = [] all_memory = [] # Provide 100Mb of additional memory for the system system_memory = 0.10 algs = [config_utils.get_algorithm_config(x) for x in items] progs = _get_resource_programs(parallel.get("progs", []), algs) # Calculate cores for prog in progs: resources = config_utils.get_resources(prog, config) all_cores.append(resources.get("cores", 1)) if len(all_cores) == 0: all_cores.append(1) cores_per_job = max(all_cores) if max_multicore: cores_per_job = min(cores_per_job, max_multicore) if "cores" in sysinfo: cores_per_job = min(cores_per_job, int(sysinfo["cores"])) total = parallel["cores"] if total > cores_per_job: num_jobs = total // cores_per_job else: num_jobs, cores_per_job = 1, total # Calculate memory. Use 1Gb memory usage per core as min baseline if not specified for prog in progs: resources = config_utils.get_resources(prog, config) memory = _get_prog_memory(resources, cores_per_job) if memory: all_memory.append(memory) if len(all_memory) == 0: all_memory.append(1) memory_per_core = max(all_memory) logger.debug("Resource requests: {progs}; memory: {memory}; cores: {cores}".format( progs=", ".join(progs), memory=", ".join("%.2f" % x for x in all_memory), cores=", ".join(str(x) for x in all_cores))) cores_per_job, memory_per_core = _ensure_min_resources(progs, cores_per_job, memory_per_core, min_memory=parallel.get("ensure_mem", {})) if cores_per_job == 1: memory_per_job = "%.2f" % memory_per_core num_jobs, mem_pct = _scale_jobs_to_memory(num_jobs, memory_per_core, sysinfo) # For single core jobs, avoid overscheduling maximum cores_per_job num_jobs = min(num_jobs, total) else: cores_per_job, memory_per_job, mem_pct = _scale_cores_to_memory(cores_per_job, memory_per_core, sysinfo, system_memory) # For local runs with multiple jobs and multiple cores, potentially scale jobs down if num_jobs > 1 and parallel.get("type") == "local": memory_per_core = float(memory_per_job) / cores_per_job num_jobs, _ = _scale_jobs_to_memory(num_jobs, memory_per_core, sysinfo) # do not overschedule if we don't have extra items to process num_jobs = int(min(num_jobs, len(items) * multiplier)) logger.debug("Configuring %d jobs to run, using %d cores each with %sg of " "memory reserved for each job" % (num_jobs, cores_per_job, str(memory_per_job))) parallel = copy.deepcopy(parallel) parallel["cores_per_job"] = cores_per_job parallel["num_jobs"] = num_jobs parallel["mem"] = str(memory_per_job) parallel["mem_pct"] = "%.2f" % mem_pct parallel["system_cores"] = sysinfo.get("cores", 1) return parallel
def calculate(parallel, items, sysinfo, config, multiplier=1, max_multicore=None): """Determine cores and workers to use for this stage based on used programs. multiplier specifies the number of regions items will be split into during processing. max_multicore specifies an optional limit on the maximum cores. Can use to force single core processing during specific tasks. sysinfo specifies cores and memory on processing nodes, allowing us to tailor jobs for available resources. """ assert len(items) > 0, "Finding job resources but no items to process" all_cores = [] all_memory = [] # Provide 100Mb of additional memory for the system system_memory = 0.10 algs = [config_utils.get_algorithm_config(x) for x in items] progs = _get_resource_programs(parallel.get("progs", []), algs) # Calculate cores for prog in progs: resources = config_utils.get_resources(prog, config) all_cores.append(resources.get("cores", 1)) if len(all_cores) == 0: all_cores.append(1) cores_per_job = max(all_cores) if max_multicore: cores_per_job = min(cores_per_job, max_multicore) if "cores" in sysinfo: cores_per_job = min(cores_per_job, int(sysinfo["cores"])) total = parallel["cores"] if total > cores_per_job: num_jobs = total // cores_per_job else: num_jobs, cores_per_job = 1, total # Calculate memory. Use 1Gb memory usage per core as min baseline if not specified for prog in progs: resources = config_utils.get_resources(prog, config) memory = _get_prog_memory(resources, cores_per_job) if memory: all_memory.append(memory) if len(all_memory) == 0: all_memory.append(1) memory_per_core = max(all_memory) logger.debug("Resource requests: {progs}; memory: {memory}; cores: {cores}".format( progs=", ".join(progs), memory=", ".join("%.2f" % x for x in all_memory), cores=", ".join(str(x) for x in all_cores))) cores_per_job, memory_per_core = _ensure_min_resources(progs, cores_per_job, memory_per_core, min_memory=parallel.get("ensure_mem", {})) if cores_per_job == 1: memory_per_job = "%.2f" % memory_per_core num_jobs, mem_pct = _scale_jobs_to_memory(num_jobs, memory_per_core, sysinfo) else: cores_per_job, memory_per_job, mem_pct = _scale_cores_to_memory(cores_per_job, memory_per_core, sysinfo, system_memory) # For local runs with multiple jobs and multiple cores, potentially scale jobs down if num_jobs > 1 and parallel.get("type") == "local": memory_per_core = float(memory_per_job) / cores_per_job num_jobs, _ = _scale_jobs_to_memory(num_jobs, memory_per_core, sysinfo) # do not overschedule if we don't have extra items to process num_jobs = min(num_jobs, len(items) * multiplier) logger.debug("Configuring %d jobs to run, using %d cores each with %sg of " "memory reserved for each job" % (num_jobs, cores_per_job, str(memory_per_job))) parallel = copy.deepcopy(parallel) parallel["cores_per_job"] = cores_per_job parallel["num_jobs"] = num_jobs parallel["mem"] = str(memory_per_job) parallel["mem_pct"] = "%.2f" % mem_pct parallel["system_cores"] = sysinfo.get("cores", 1) return parallel