def runcmd(model_type, config_path, init_run, n_runs, lab_path, dir_path): pbs_config = cli.get_config(config_path) pbs_vars = cli.set_env_vars(init_run, n_runs, lab_path, dir_path) collate_queue = pbs_config.get('collate_queue', 'copyq') pbs_config['queue'] = collate_queue n_cpus_request = pbs_config.get('collate_ncpus', 1) pbs_config['ncpus'] = n_cpus_request # Modify jobname if 'jobname' in pbs_config: pbs_config['jobname'] = pbs_config['jobname'][:13] + '_c' else: if not dir_path: dpath = os.path.basename(os.getcwd()) else: dpath = dir_path pbs_config['jobname'] = os.path.normpath(dpath[:13]) + '_c' # Replace (or remove) walltime collate_walltime = pbs_config.get('collate_walltime') if collate_walltime: pbs_config['walltime'] = collate_walltime else: # Remove the model walltime if set try: pbs_config.pop('walltime') except KeyError: pass # Replace (or remove) memory request collate_mem = pbs_config.get('collate_mem') if collate_mem: pbs_config['mem'] = collate_mem else: # Remove the model memory request if set try: pbs_config.pop('mem') except KeyError: pass # Disable hyperthreading qsub_flags = [] for flag in pbs_config.get('qsub_flags', '').split(): if 'hyperthread' not in flag: qsub_flags.append(flag) pbs_config['qsub_flags'] = ' '.join(qsub_flags) cli.submit_job('payu-collate', pbs_config, pbs_vars)
def runcmd(model_type, config_path, init_run, n_runs, lab_path): pbs_config = fsops.read_config(config_path) pbs_vars = cli.set_env_vars(init_run=init_run, n_runs=n_runs, lab_path=lab_path) pbs_config['queue'] = pbs_config.get('profile_queue', 'normal') # Collation jobs are (currently) serial pbs_config['ncpus'] = 1 # Modify jobname pbs_config['jobname'] = pbs_config['jobname'][:13] + '_p' # Replace (or remove) walltime profile_walltime = pbs_config.get('profile_walltime') if profile_walltime: pbs_config['walltime'] = profile_walltime else: # Remove the model walltime if set try: pbs_config.pop('walltime') except KeyError: pass # Replace (or remove) memory request profile_mem = pbs_config.get('profile_mem') if profile_mem: pbs_config['mem'] = profile_mem else: # Remove the model memory request if set try: pbs_config.pop('mem') except KeyError: pass # Disable hyperthreading qsub_flags = [] for flag in pbs_config.get('qsub_flags', '').split(): if 'hyperthread' not in flag: qsub_flags.append(flag) pbs_config['qsub_flags'] = ' '.join(qsub_flags) cli.submit_job('payu-profile', pbs_config, pbs_vars)
def runcmd(model_type, config_path, init_run, n_runs, lab_path): pbs_config = cli.get_config(config_path) pbs_vars = cli.set_env_vars(init_run, n_runs, lab_path) pbs_config['queue'] = pbs_config.get('profile_queue', 'normal') # Collation jobs are (currently) serial pbs_config['ncpus'] = 1 # Modify jobname pbs_config['jobname'] = pbs_config['jobname'][:13] + '_p' # Replace (or remove) walltime profile_walltime = pbs_config.get('profile_walltime') if profile_walltime: pbs_config['walltime'] = profile_walltime else: # Remove the model walltime if set try: pbs_config.pop('walltime') except KeyError: pass # Replace (or remove) memory request profile_mem = pbs_config.get('profile_mem') if profile_mem: pbs_config['mem'] = profile_mem else: # Remove the model memory request if set try: pbs_config.pop('mem') except KeyError: pass # Disable hyperthreading qsub_flags = [] for flag in pbs_config.get('qsub_flags', '').split(): if 'hyperthread' not in flag: qsub_flags.append(flag) pbs_config['qsub_flags'] = ' '.join(qsub_flags) cli.submit_job('payu-profile', pbs_config, pbs_vars)
def runcmd(model_type, config_path, init_run, n_runs, lab_path, reproduce): # Get job submission configuration pbs_config = fsops.read_config(config_path) pbs_vars = cli.set_env_vars(init_run, n_runs, lab_path) # Set the queue # NOTE: Maybe force all jobs on the normal queue if 'queue' not in pbs_config: pbs_config['queue'] = 'normal' # TODO: Create drivers for servers platform = pbs_config.get('platform', {}) max_cpus_per_node = platform.get('nodesize', 16) max_ram_per_node = platform.get('nodemem', 32) # Adjust the CPUs for any model-specific settings # TODO: Incorporate this into the Model driver mask_table = pbs_config.get('mask_table', False) if mask_table: # Check if a mask table exists # TODO: Is control_path defined at this stage? mask_table_fname = None for fname in os.listdir(os.curdir): if f.startswith('mask_table'): mask_table_fname = fname # TODO TODO if 'ncpureq' in pbs_config: # Hard override of CPU request n_cpus_request = pbs_config.get('ncpureq') elif 'submodels' in pbs_config and 'ncpus' not in pbs_config: # Increase the cpu request to match a complete node n_cpus_request = 0 submodel_configs = pbs_config['submodels'] for model_config in submodel_configs: n_cpus_request += model_config.get('ncpus', 0) else: n_cpus_request = pbs_config.get('ncpus', 1) n_cpus = n_cpus_request n_cpus_per_node = pbs_config.get('npernode', max_cpus_per_node) assert n_cpus_per_node <= max_cpus_per_node node_misalignment = n_cpus % max_cpus_per_node != 0 node_increase = n_cpus_per_node < max_cpus_per_node # Increase the CPUs to accomodate the cpu-per-node request if n_cpus > max_cpus_per_node and (node_increase or node_misalignment): # Number of requested nodes n_nodes = 1 + (n_cpus - 1) // n_cpus_per_node n_cpu_request = max_cpus_per_node * n_nodes n_inert_cpus = n_cpu_request - n_cpus print('payu: warning: Job request includes {n} unused CPUs.' ''.format(n=n_inert_cpus)) # Increase CPU request to match the effective node request n_cpus = max_cpus_per_node * n_nodes # Update the ncpus field in the config if n_cpus != n_cpus_request: print('payu: warning: CPU request increased from {n_req} to {n}' ''.format(n_req=n_cpus_request, n=n_cpus)) # Update the (possibly unchanged) value of ncpus pbs_config['ncpus'] = n_cpus # Set memory to use the complete node if unspecified pbs_mem = pbs_config.get('mem') if not pbs_mem: if n_cpus > max_cpus_per_node: pbs_mem = (n_cpus // max_cpus_per_node) * max_ram_per_node else: pbs_mem = n_cpus * (max_ram_per_node // max_cpus_per_node) pbs_config['mem'] = '{0}GB'.format(pbs_mem) cli.submit_job('payu-run', pbs_config, pbs_vars)
def runcmd(model_type, config_path, init_run, lab_path, dir_path): pbs_config = fsops.read_config(config_path) pbs_vars = cli.set_env_vars(init_run=init_run, lab_path=lab_path, dir_path=dir_path) collate_config = pbs_config.get('collate', {}) # The mpi flag implies using mppnccombine-fast mpi = collate_config.get('mpi', False) default_ncpus = 1 default_queue = 'copyq' if mpi: default_ncpus = 2 default_queue = 'express' collate_queue = collate_config.get('queue', default_queue) pbs_config['queue'] = collate_queue n_cpus_request = collate_config.get('ncpus', default_ncpus) pbs_config['ncpus'] = n_cpus_request collate_jobname = collate_config.get('jobname') if not collate_jobname: pbs_jobname = pbs_config.get('jobname') if not pbs_jobname: if dir_path and os.path.isdir(dir_path): pbs_jobname = os.path.basename(dir_path) else: pbs_jobname = os.path.basename(os.getcwd()) collate_jobname = pbs_jobname[:13] + '_c' # NOTE: Better to construct `collate_config` to pass to `submit_job` pbs_config['jobname'] = collate_jobname[:15] # Replace (or remove) walltime collate_walltime = collate_config.get('walltime') if collate_walltime: pbs_config['walltime'] = collate_walltime else: # Remove the model walltime if set try: pbs_config.pop('walltime') except KeyError: pass # TODO: calcualte default memory request based on ncpus and platform pbs_config['mem'] = collate_config.get('mem', '2GB') # Disable hyperthreading qsub_flags = [] iflags = iter(pbs_config.get('qsub_flags', '').split()) for flag in iflags: if flag == '-l': try: flag += ' ' + next(iflags) except StopIteration: break # TODO: Test the sequence, not just existence of characters in string if 'hyperthread' not in flag: qsub_flags.append(flag) pbs_config['qsub_flags'] = ' '.join(qsub_flags) cli.submit_job('payu-collate', pbs_config, pbs_vars)
def runcmd(model_type, config_path, init_run, n_runs, lab_path): # Get job submission configuration pbs_config = cli.get_config(config_path) pbs_vars = cli.set_env_vars(init_run, n_runs, lab_path) # Set the queue # NOTE: Maybe force all jobs on the normal queue if 'queue' not in pbs_config: pbs_config['queue'] = 'normal' # TODO: Create drivers for servers max_cpus_per_node = 16 # Adjust the CPUs for any model-specific settings # TODO: Incorporate this into the Model driver mask_table = pbs_config.get('mask_table', False) if mask_table: # Check if a mask table exists # TODO: Is control_path defined at this stage? mask_table_fname = None for f in os.listdir(os.curdir): if f.startswith('mask_table'): mask_table_fname = f # TODO TODO # Increase the cpu request to match a complete node if 'submodels' in pbs_config and 'ncpus' not in pbs_config: submodel_config = pbs_config['submodels'] n_cpus_request = 0 for model in submodel_config: n_cpus_request += submodel_config[model].get('ncpus', 0) else: n_cpus_request = pbs_config.get('ncpus', 1) n_cpus = n_cpus_request n_cpus_per_node = pbs_config.get('npernode', max_cpus_per_node) assert n_cpus_per_node <= max_cpus_per_node node_misalignment = n_cpus % max_cpus_per_node != 0 node_increase = n_cpus_per_node < max_cpus_per_node # Increase the CPUs to accomodate the cpu-per-node request if n_cpus > max_cpus_per_node and (node_increase or node_misalignment): # Number of requested nodes n_nodes = 1 + (n_cpus - 1) // n_cpus_per_node n_cpu_request = max_cpus_per_node * n_nodes n_inert_cpus = n_cpu_request - n_cpus print('payu: warning: Job request includes {} unused CPUs.' ''.format(n_inert_cpus)) # Increase CPU request to match the effective node request n_cpus = max_cpus_per_node * n_nodes # Update the ncpus field in the config if n_cpus != n_cpus_request: print('payu: warning: CPU request increased from {} to {}' ''.format(n_cpus_request, n_cpus)) # Update the (possibly unchanged) value of ncpus pbs_config['ncpus'] = n_cpus # Set memory to use the complete node if unspeficied # TODO: Move RAM per node as variable pbs_mem = pbs_config.get('mem') if not pbs_mem and n_cpus > max_cpus_per_node: pbs_config['mem'] = '{}GB'.format((n_cpus // max_cpus_per_node) * 31) cli.submit_job('payu-run', pbs_config, pbs_vars)
def runcmd(model_type, config_path, init_run, n_runs, lab_path, dir_path): pbs_config = fsops.read_config(config_path) pbs_vars = cli.set_env_vars(init_run, n_runs, lab_path, dir_path) collate_config = pbs_config.get('collate', {}) # The mpi flag implies using mppnccombine-fast mpi = collate_config.get('mpi', False) default_ncpus = 1 default_queue = 'copyq' if mpi: default_ncpus = 2 default_queue = 'express' collate_queue = collate_config.get('queue', default_queue) pbs_config['queue'] = collate_queue n_cpus_request = collate_config.get('ncpus', default_ncpus) pbs_config['ncpus'] = n_cpus_request collate_jobname = collate_config.get('jobname') if not collate_jobname: pbs_jobname = pbs_config.get('jobname') if not pbs_jobname: if dir_path and os.path.isdir(dir_path): pbs_jobname = os.path.basename(dir_path) else: pbs_jobname = os.path.basename(os.getcwd()) collate_jobname = pbs_jobname[:13] + '_c' # NOTE: Better to construct `collate_config` to pass to `submit_job` pbs_config['jobname'] = collate_jobname[:15] # Replace (or remove) walltime collate_walltime = collate_config.get('walltime') if collate_walltime: pbs_config['walltime'] = collate_walltime else: # Remove the model walltime if set try: pbs_config.pop('walltime') except KeyError: pass # TODO: calcualte default memory request based on ncpus and platform pbs_config['mem'] = collate_config.get('mem', '2GB') # Disable hyperthreading qsub_flags = [] iflags = iter(pbs_config.get('qsub_flags', '').split()) for flag in iflags: if flag == '-l': try: flag += ' ' + next(iflags) except StopIteration: break # TODO: Test the sequence, not just existence of characters in string if 'hyperthread' not in flag: qsub_flags.append(flag) pbs_config['qsub_flags'] = ' '.join(qsub_flags) cli.submit_job('payu-collate', pbs_config, pbs_vars)