def runcmd(model_type, config_path, init_run, n_runs, lab_path, dir_path): pbs_config = cli.get_config(config_path) pbs_vars = cli.set_env_vars(init_run, n_runs, lab_path, dir_path) collate_queue = pbs_config.get('collate_queue', 'copyq') pbs_config['queue'] = collate_queue n_cpus_request = pbs_config.get('collate_ncpus', 1) pbs_config['ncpus'] = n_cpus_request # Modify jobname if 'jobname' in pbs_config: pbs_config['jobname'] = pbs_config['jobname'][:13] + '_c' else: if not dir_path: dpath = os.path.basename(os.getcwd()) else: dpath = dir_path pbs_config['jobname'] = os.path.normpath(dpath[:13]) + '_c' # Replace (or remove) walltime collate_walltime = pbs_config.get('collate_walltime') if collate_walltime: pbs_config['walltime'] = collate_walltime else: # Remove the model walltime if set try: pbs_config.pop('walltime') except KeyError: pass # Replace (or remove) memory request collate_mem = pbs_config.get('collate_mem') if collate_mem: pbs_config['mem'] = collate_mem else: # Remove the model memory request if set try: pbs_config.pop('mem') except KeyError: pass # Disable hyperthreading qsub_flags = [] for flag in pbs_config.get('qsub_flags', '').split(): if 'hyperthread' not in flag: qsub_flags.append(flag) pbs_config['qsub_flags'] = ' '.join(qsub_flags) cli.submit_job('payu-collate', pbs_config, pbs_vars)
def runcmd(model_type, config_path, init_run, n_runs, lab_path): pbs_config = cli.get_config(config_path) pbs_vars = cli.set_env_vars(init_run, n_runs, lab_path) pbs_config['queue'] = pbs_config.get('profile_queue', 'normal') # Collation jobs are (currently) serial pbs_config['ncpus'] = 1 # Modify jobname pbs_config['jobname'] = pbs_config['jobname'][:13] + '_p' # Replace (or remove) walltime profile_walltime = pbs_config.get('profile_walltime') if profile_walltime: pbs_config['walltime'] = profile_walltime else: # Remove the model walltime if set try: pbs_config.pop('walltime') except KeyError: pass # Replace (or remove) memory request profile_mem = pbs_config.get('profile_mem') if profile_mem: pbs_config['mem'] = profile_mem else: # Remove the model memory request if set try: pbs_config.pop('mem') except KeyError: pass # Disable hyperthreading qsub_flags = [] for flag in pbs_config.get('qsub_flags', '').split(): if 'hyperthread' not in flag: qsub_flags.append(flag) pbs_config['qsub_flags'] = ' '.join(qsub_flags) cli.submit_job('payu-profile', pbs_config, pbs_vars)
def runcmd(model_type, config_path, init_run, n_runs, lab_path): # Get job submission configuration pbs_config = cli.get_config(config_path) pbs_vars = cli.set_env_vars(init_run, n_runs, lab_path) # Set the queue # NOTE: Maybe force all jobs on the normal queue if 'queue' not in pbs_config: pbs_config['queue'] = 'normal' # TODO: Create drivers for servers platform = pbs_config.get('platform', {}) max_cpus_per_node = platform.get('nodesize', 16) max_ram_per_node = platform.get('nodemem', 32) # Adjust the CPUs for any model-specific settings # TODO: Incorporate this into the Model driver mask_table = pbs_config.get('mask_table', False) if mask_table: # Check if a mask table exists # TODO: Is control_path defined at this stage? mask_table_fname = None for fname in os.listdir(os.curdir): if f.startswith('mask_table'): mask_table_fname = fname # TODO TODO if 'ncpureq' in pbs_config: # Hard override of CPU request n_cpus_request = pbs_config.get('ncpureq') elif 'submodels' in pbs_config and 'ncpus' not in pbs_config: # Increase the cpu request to match a complete node submodel_config = pbs_config['submodels'] n_cpus_request = 0 for model in submodel_config: n_cpus_request += submodel_config[model].get('ncpus', 0) else: n_cpus_request = pbs_config.get('ncpus', 1) n_cpus = n_cpus_request n_cpus_per_node = pbs_config.get('npernode', max_cpus_per_node) assert n_cpus_per_node <= max_cpus_per_node node_misalignment = n_cpus % max_cpus_per_node != 0 node_increase = n_cpus_per_node < max_cpus_per_node # Increase the CPUs to accomodate the cpu-per-node request if n_cpus > max_cpus_per_node and (node_increase or node_misalignment): # Number of requested nodes n_nodes = 1 + (n_cpus - 1) // n_cpus_per_node n_cpu_request = max_cpus_per_node * n_nodes n_inert_cpus = n_cpu_request - n_cpus print('payu: warning: Job request includes {} unused CPUs.' ''.format(n_inert_cpus)) # Increase CPU request to match the effective node request n_cpus = max_cpus_per_node * n_nodes # Update the ncpus field in the config if n_cpus != n_cpus_request: print('payu: warning: CPU request increased from {} to {}' ''.format(n_cpus_request, n_cpus)) # Update the (possibly unchanged) value of ncpus pbs_config['ncpus'] = n_cpus # Set memory to use the complete node if unspecified pbs_mem = pbs_config.get('mem') if not pbs_mem: if n_cpus > max_cpus_per_node: pbs_mem = (n_cpus // max_cpus_per_node) * max_ram_per_node else: pbs_mem = n_cpus * (max_ram_per_node // max_cpus_per_node) pbs_config['mem'] = '{}GB'.format(pbs_mem) cli.submit_job('payu-run', pbs_config, pbs_vars)
def runcmd(model_type, config_path, init_run, n_runs, lab_path): # Get job submission configuration pbs_config = cli.get_config(config_path) pbs_vars = cli.set_env_vars(init_run, n_runs, lab_path) # Set the queue # NOTE: Maybe force all jobs on the normal queue if 'queue' not in pbs_config: pbs_config['queue'] = 'normal' # TODO: Create drivers for servers platform = pbs_config.get('platform', {}) max_cpus_per_node = platform.get('nodesize', 16) max_ram_per_node = platform.get('nodemem', 32) # Adjust the CPUs for any model-specific settings # TODO: Incorporate this into the Model driver mask_table = pbs_config.get('mask_table', False) if mask_table: # Check if a mask table exists # TODO: Is control_path defined at this stage? mask_table_fname = None for f in os.listdir(os.curdir): if f.startswith('mask_table'): mask_table_fname = f # TODO TODO if 'ncpureq' in pbs_config: # Hard override of CPU request n_cpus_request = pbs_config.get('ncpureq') elif 'submodels' in pbs_config and 'ncpus' not in pbs_config: # Increase the cpu request to match a complete node submodel_config = pbs_config['submodels'] n_cpus_request = 0 for model in submodel_config: n_cpus_request += submodel_config[model].get('ncpus', 0) else: n_cpus_request = pbs_config.get('ncpus', 1) n_cpus = n_cpus_request n_cpus_per_node = pbs_config.get('npernode', max_cpus_per_node) assert n_cpus_per_node <= max_cpus_per_node node_misalignment = n_cpus % max_cpus_per_node != 0 node_increase = n_cpus_per_node < max_cpus_per_node # Increase the CPUs to accomodate the cpu-per-node request if n_cpus > max_cpus_per_node and (node_increase or node_misalignment): # Number of requested nodes n_nodes = 1 + (n_cpus - 1) // n_cpus_per_node n_cpu_request = max_cpus_per_node * n_nodes n_inert_cpus = n_cpu_request - n_cpus print('payu: warning: Job request includes {} unused CPUs.' ''.format(n_inert_cpus)) # Increase CPU request to match the effective node request n_cpus = max_cpus_per_node * n_nodes # Update the ncpus field in the config if n_cpus != n_cpus_request: print('payu: warning: CPU request increased from {} to {}' ''.format(n_cpus_request, n_cpus)) # Update the (possibly unchanged) value of ncpus pbs_config['ncpus'] = n_cpus # Set memory to use the complete node if unspeficied # TODO: Move RAM per node as variable pbs_mem = pbs_config.get('mem') if not pbs_mem: if n_cpus > max_cpus_per_node: pbs_mem = (n_cpus // max_cpus_per_node) * 31 else: pbs_mem = 2 * n_cpus pbs_config['mem'] = '{}GB'.format(pbs_mem) cli.submit_job('payu-run', pbs_config, pbs_vars)