Ejemplo n.º 1
0
 def make_link(self, filepath):
     """
     Payu integration function for creating symlinks in work directories
     which point back to the original file.
     """
     # Check file exists. It may have been deleted but still in manifest
     if not os.path.exists(self.fullpath(filepath)):
         print('File not found: {filepath}'.format(
             filepath=self.fullpath(filepath)))
         if self.contains(filepath):
             print('removing from manifest')
             self.delete(filepath)
             self.needsync = True
     else:
         try:
             destdir = os.path.dirname(filepath)
             # Make destination directory if not already exists
             # Necessary because sometimes this is called before
             # individual model setup
             if not os.path.exists(destdir):
                 os.makedirs(destdir)
             if self.copy_file(filepath):
                 shutil.copy(self.fullpath(filepath), filepath)
                 perm = (stat.S_IRUSR | stat.S_IRGRP
                         | stat.S_IROTH | stat.S_IWUSR)
                 os.chmod(filepath, perm)
             else:
                 make_symlink(self.fullpath(filepath), filepath)
         except Exception:
             action = 'copying' if self.copy_file else 'linking'
             print('payu: error: {action} orig: {orig} '
                   'local: {local}'.format(action=action,
                                           orig=self.fullpath(filepath),
                                           local=filepath))
             raise
Ejemplo n.º 2
0
 def make_link(self, filepath):
     """
     Payu integration function for creating symlinks in work directories
     which point back to the original file.
     """
     # Check file exists. It may have been deleted but still in manifest
     if not os.path.exists(self.fullpath(filepath)):
         print('File not found: {filepath}'.format(
             filepath=self.fullpath(filepath)))
         if self.contains(filepath):
             print('removing from manifest')
             self.delete(filepath)
             self.needsync = True
     else:
         try:
             if self.copy_file(filepath):
                 shutil.copy(self.fullpath(filepath), filepath)
             else:
                 make_symlink(self.fullpath(filepath), filepath)
         except:
             action = 'copying' if self.copy_file else 'linking'
             print('payu: error: {action} orig: {orig} '
                   'local: {local}'.format(action=action,
                                           orig=self.fullpath(filepath),
                                           local=filepath))
             raise
Ejemplo n.º 3
0
 def make_link(self, filepath):
     """
     Payu integration function for creating symlinks in work directories
     which point back to the original file.
     """
     # Check file exists. It may have been deleted but still in manifest
     if not os.path.exists(self.fullpath(filepath)):
         print('File not found: {filepath}'.format(
               filepath=self.fullpath(filepath)))
         if self.contains(filepath):
             print('removing from manifest')
             self.delete(filepath)
             self.needsync = True
     else:
         try:
             destdir = os.path.dirname(filepath)
             # Make destination directory if not already exists
             # Necessary because sometimes this is called before
             # individual model setup
             if not os.path.exists(destdir):
                 os.makedirs(destdir)
             if self.copy_file(filepath):
                 shutil.copy(self.fullpath(filepath), filepath)
                 perm = (stat.S_IRUSR | stat.S_IRGRP
                         | stat.S_IROTH | stat.S_IWUSR)
                 os.chmod(filepath, perm)
             else:
                 make_symlink(self.fullpath(filepath), filepath)
         except Exception:
             action = 'copying' if self.copy_file else 'linking'
             print('payu: error: {action} orig: {orig} '
                   'local: {local}'.format(action=action,
                                           orig=self.fullpath(filepath),
                                           local=filepath))
             raise
Ejemplo n.º 4
0
    def setup(self):
        super(Oasis, self).setup()

        # Copy OASIS data to the other submodels

        # TODO: Parse namcouple to determine filelist
        # TODO: Let users map files to models
        input_files = [f for f in os.listdir(self.work_path)
                       if f not in self.config_files]

        for model in self.expt.models:

            # Skip the oasis self-reference
            if model == self:
                continue

            # Skip models without a work_path (like access)
            if not hasattr(model, 'work_path'):
                continue

            mkdir_p(model.work_path)
            for f_name in (self.config_files + input_files):
                f_path = os.path.join(self.work_path, f_name)
                f_sympath = os.path.join(model.work_path, f_name)
                make_symlink(f_path, f_sympath)

        if self.expt.runtime:
            # TODO: Implement runtime patch to namcouple
            pass
Ejemplo n.º 5
0
    def setup(self, force_archive=False):

        # Confirm that no output path already exists
        if os.path.exists(self.output_path):
            sys.exit('payu: error: Output path already exists.')

        mkdir_p(self.work_path)

        if force_archive:
            mkdir_p(self.archive_path)
            make_symlink(self.archive_path, self.archive_sym_path)

        # Archive the payu config
        # TODO: This just copies the existing config.yaml file, but we should
        #       reconstruct a new file including default values
        config_src = os.path.join(self.control_path, 'config.yaml')
        config_dst = os.path.join(self.work_path)
        shutil.copy(config_src, config_dst)

        # Stripe directory in Lustre
        # TODO: Make this more configurable
        do_stripe = self.config.get('stripedio', False)
        if do_stripe:
            cmd = 'lfs setstripe -c 8 -s 8m {0}'.format(self.work_path)
            sp.check_call(shlex.split(cmd))

        make_symlink(self.work_path, self.work_sym_path)

        # Set up all file manifests
        self.manifest.setup()

        for model in self.models:
            model.setup()

        # Call the macro-model setup
        if len(self.models) > 1:
            self.model.setup()

        self.manifest.check_manifests()

        # Copy manifests to work directory so they archived on completion
        manifest_path = os.path.join(self.work_path, 'manifests')
        self.manifest.copy_manifests(manifest_path)

        setup_script = self.userscripts.get('setup')
        if setup_script:
            self.run_userscript(setup_script)

        # Profiler setup
        expt_profs = self.config.get('profilers', [])
        if not isinstance(expt_profs, list):
            expt_profs = [expt_profs]

        for prof_name in expt_profs:
            ProfType = payu.profilers.index[prof_name]
            prof = ProfType(self)
            self.profilers.append(prof)

            # Testing
            prof.setup()
Ejemplo n.º 6
0
    def setup(self):
        super(Oasis, self).setup()

        # Copy OASIS data to the other submodels

        # TODO: Parse namcouple to determine filelist
        # TODO: Let users map files to models
        input_files = [
            f for f in os.listdir(self.work_path) if f not in self.config_files
        ]

        for model in self.expt.models:

            # Skip the oasis self-reference
            if model == self:
                continue

            # Skip models without a work_path (like access)
            if not hasattr(model, 'work_path'):
                continue

            mkdir_p(model.work_path)
            for f_name in (self.config_files + input_files):
                f_path = os.path.join(self.work_path, f_name)
                f_sympath = os.path.join(model.work_path, f_name)
                make_symlink(f_path, f_sympath)

        if self.expt.runtime:
            # TODO: Implement runtime patch to namcouple
            pass
Ejemplo n.º 7
0
    def setup(self, force_archive=False):

        # Confirm that no output path already exists
        if os.path.exists(self.output_path):
            sys.exit('payu: error: Output path already exists.')

        mkdir_p(self.work_path)

        if force_archive:
            mkdir_p(self.archive_path)
            make_symlink(self.archive_path, self.archive_sym_path)

        # Archive the payu config
        # TODO: This just copies the existing config.yaml file, but we should
        #       reconstruct a new file including default values
        config_src = os.path.join(self.control_path, 'config.yaml')
        config_dst = os.path.join(self.work_path)
        shutil.copy(config_src, config_dst)

        # Stripe directory in Lustre
        # TODO: Make this more configurable
        do_stripe = self.config.get('stripedio', False)
        if do_stripe:
            cmd = 'lfs setstripe -c 8 -s 8m {0}'.format(self.work_path)
            sp.check_call(shlex.split(cmd))

        make_symlink(self.work_path, self.work_sym_path)

        # Set up all file manifests
        self.manifest.setup()

        for model in self.models:
            model.setup()

        # Call the macro-model setup
        if len(self.models) > 1:
            self.model.setup()

        # Use manifest to populate work directory
        self.manifest.make_links()

        # Copy manifests to work directory so they archived on completion
        self.manifest.copy_manifests(os.path.join(self.work_path,'manifests'))

        setup_script = self.userscripts.get('setup')
        if setup_script:
            self.run_userscript(setup_script)

        # Profiler setup
        expt_profs = self.config.get('profilers', [])
        if not isinstance(expt_profs, list):
            expt_profs = [expt_profs]

        for prof_name in expt_profs:
            ProfType = payu.profilers.index[prof_name]
            prof = ProfType(self)
            self.profilers.append(prof)

            # Testing
            prof.setup()
Ejemplo n.º 8
0
    def archive(self):

        mkdir_p(self.archive_path)
        make_symlink(self.archive_path, self.archive_sym_path)

        # Remove work symlink
        if os.path.islink(self.work_sym_path):
            os.remove(self.work_sym_path)

        mkdir_p(self.restart_path)

        for model in self.models:
            model.archive()

        # Postprocess the model suite
        if len(self.models) > 1:
            self.model.archive()

        # Double-check that the run path does not exist
        if os.path.exists(self.output_path):
            sys.exit('payu: error: Output path already exists.')

        cmd = 'mv {} {}'.format(self.work_path, self.output_path)
        sp.check_call(shlex.split(cmd))

        # Remove old restart files
        # TODO: Move to subroutine
        restart_freq = self.config.get('restart_freq', default_restart_freq)
        restart_history = self.config.get('restart_history',
                                          default_restart_history)

        # Remove any outdated restart files
        prior_restart_dirs = [
            d for d in os.listdir(self.archive_path) if d.startswith('restart')
        ]

        for res_dir in prior_restart_dirs:

            res_idx = int(res_dir.lstrip('restart'))
            if (self.repeat_run
                    or (not res_idx % restart_freq == 0 and res_idx <=
                        (self.counter - restart_history))):

                res_path = os.path.join(self.archive_path, res_dir)

                # Only delete real directories; ignore symbolic restart links
                if os.path.isdir(res_path):
                    shutil.rmtree(res_path)

        if self.config.get('collate', True):
            cmd = 'payu collate -i {}'.format(self.counter)
            sp.check_call(shlex.split(cmd))

        if self.config.get('hpctoolkit', False):
            cmd = 'payu profile -i {}'.format(self.counter)
            sp.check_call(shlex.split(cmd))

        archive_script = self.userscripts.get('archive')
        if archive_script:
            self.run_userscript(archive_script)
Ejemplo n.º 9
0
    def archive(self):

        mkdir_p(self.archive_path)
        make_symlink(self.archive_path, self.archive_sym_path)

        # Remove work symlink
        if os.path.islink(self.work_sym_path):
            os.remove(self.work_sym_path)

        mkdir_p(self.restart_path)

        for model in self.models:
            model.archive()

        # Postprocess the model suite
        if len(self.models) > 1:
            self.model.archive()

        # Double-check that the run path does not exist
        if os.path.exists(self.output_path):
            sys.exit('payu: error: Output path already exists.')

        cmd = 'mv {} {}'.format(self.work_path, self.output_path)
        sp.check_call(shlex.split(cmd))

        # Remove old restart files
        # TODO: Move to subroutine
        restart_freq = self.config.get('restart_freq', default_restart_freq)
        restart_history = self.config.get('restart_history',
                                          default_restart_history)

        # Remove any outdated restart files
        prior_restart_dirs = [d for d in os.listdir(self.archive_path)
                              if d.startswith('restart')]

        for res_dir in prior_restart_dirs:

            res_idx = int(res_dir.lstrip('restart'))
            if (not res_idx % restart_freq == 0 and
                    res_idx <= (self.counter - restart_history)):

                res_path = os.path.join(self.archive_path, res_dir)
                shutil.rmtree(res_path)

        if self.config.get('collate', True):
            cmd = 'payu collate -i {} -l {}'.format(self.counter,
                                                    self.lab.basepath)
            sp.check_call(shlex.split(cmd))

        if self.config.get('hpctoolkit', False):
            cmd = 'payu profile -i {}'.format(self.counter)
            sp.check_call(shlex.split(cmd))

        archive_script = self.userscripts.get('archive')
        if archive_script:
            self.run_userscript(archive_script)
Ejemplo n.º 10
0
    def setup(self):

        # Create experiment directory structure
        mkdir_p(self.work_input_path)
        mkdir_p(self.work_restart_path)
        mkdir_p(self.work_output_path)

        # Copy configuration files from control path
        for f_name in self.config_files:
            f_path = os.path.join(self.control_path, f_name)
            shutil.copy(f_path, self.work_path)

        for f_name in self.optional_config_files:
            f_path = os.path.join(self.control_path, f_name)
            try:
                shutil.copy(f_path, self.work_path)
            except IOError as exc:
                if exc.errno == errno.ENOENT:
                    pass
                else:
                    raise

        # Link restart files from prior run
        if self.prior_restart_path and not self.expt.repeat_run:
            restart_files = self.get_prior_restart_files()
            for f_name in restart_files:
                f_restart = os.path.join(self.prior_restart_path, f_name)
                f_input = os.path.join(self.work_init_path, f_name)
                if self.copy_restarts:
                    shutil.copy(f_restart, f_input)
                else:
                    make_symlink(f_restart, f_input)

        # Link input data
        for input_path in self.input_paths:
            input_files = os.listdir(input_path)
            for f_name in input_files:
                f_input = os.path.join(input_path, f_name)
                f_work_input = os.path.join(self.work_input_path, f_name)
                # Do not use input file if it is in RESTART
                if not os.path.exists(f_work_input):
                    if self.copy_inputs:
                        shutil.copy(f_input, f_work_input)
                    else:
                        make_symlink(f_input, f_work_input)

                    # Some models overwrite their own input for restarts
                    # (e.g. OASIS)
                    if self.make_inputs_writeable:
                        os.chmod(f_work_input,
                                 stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP)

        t_step = self.config.get('timestep')
        if t_step:
            self.set_timestep(t_step)
Ejemplo n.º 11
0
Archivo: mom.py Proyecto: penguian/payu
    def setup(self):
        # FMS initialisation
        super(Mom, self).setup()

        if not self.top_level_model:
            # Make log dir
            mkdir_p(os.path.join(self.work_path, 'log'))

        input_nml_path = os.path.join(self.work_path, 'input.nml')
        input_nml = f90nml.read(input_nml_path)

        # Set the runtime
        if self.expt.runtime:
            ocean_solo_nml = input_nml['ocean_solo_nml']

            ocean_solo_nml['years'] = self.expt.runtime['years']
            ocean_solo_nml['months'] = self.expt.runtime['months']
            ocean_solo_nml['days'] = self.expt.runtime['days']
            ocean_solo_nml['seconds'] = self.expt.runtime.get('seconds', 0)

            input_nml.write(input_nml_path, force=True)

        # Construct the land CPU mask
        if self.expt.config.get('mask_table', False):
            # NOTE: This function actually creates a mask table using the
            #       `check_mask` command line tool.  But it is not very usable
            #       since you need to know the number of masked CPUs to submit
            #       the job.  It needs a rethink of the submission process.
            self.create_mask_table(input_nml)

        # NOTE: Don't expect this to be here forever...
        # Attempt to set a mask table from the input
        if self.config.get('mask', False):
            mask_path = os.path.join(self.work_input_path, 'ocean_mask_table')

            # Remove any existing mask
            # (If no reference mask is available, then we will not use one)
            if os.path.isfile(mask_path):
                os.remove(mask_path)

            # Reference mask table
            assert ('layout' in input_nml['ocean_model_nml'])
            nx, ny = input_nml['ocean_model_nml'].get('layout')
            n_masked_cpus = nx * ny - self.config.get('ncpus')

            mask_table_fname = 'mask_table.{nmask}.{nx}x{ny}'.format(
                nmask=n_masked_cpus, nx=nx, ny=ny)

            ref_mask_path = os.path.join(self.work_input_path,
                                         mask_table_fname)

            # Set (or replace) mask table if reference is available
            if os.path.isfile(ref_mask_path):
                make_symlink(ref_mask_path, mask_path)
Ejemplo n.º 12
0
    def setup(self):

        # Create experiment directory structure
        mkdir_p(self.work_input_path)
        mkdir_p(self.work_restart_path)
        mkdir_p(self.work_output_path)

        # Copy configuration files from control path
        for f_name in self.config_files:
            f_path = os.path.join(self.control_path, f_name)
            shutil.copy(f_path, self.work_path)

        for f_name in self.optional_config_files:
            f_path = os.path.join(self.control_path, f_name)
            try:
                shutil.copy(f_path, self.work_path)
            except IOError as exc:
                if exc.errno == errno.ENOENT:
                    pass
                else:
                    raise

        # Link restart files from prior run
        if self.prior_restart_path and not self.expt.repeat_run:
            restart_files = self.get_prior_restart_files()
            for f_name in restart_files:
                f_restart = os.path.join(self.prior_restart_path, f_name)
                f_input = os.path.join(self.work_init_path, f_name)
                if self.copy_restarts:
                    shutil.copy(f_restart, f_input)
                else:
                    make_symlink(f_restart, f_input)

        # Link input data
        for input_path in self.input_paths:
            input_files = os.listdir(input_path)
            for f_name in input_files:
                f_input = os.path.join(input_path, f_name)
                f_work_input = os.path.join(self.work_input_path, f_name)
                # Do not use input file if it is in RESTART
                if not os.path.exists(f_work_input):
                    if self.copy_inputs:
                        shutil.copy(f_input, f_work_input)
                    else:
                        make_symlink(f_input, f_work_input)

        timestep = self.config.get('timestep')
        if timestep:
            self.set_timestep(timestep)
Ejemplo n.º 13
0
Archivo: cice.py Proyecto: nicjhan/payu
    def link_restart(self, fpath):

        input_work_path = os.path.join(self.work_path, fpath)

        # Exit if the restart file already exists
        if os.path.isfile(input_work_path):
            return

        input_path = None
        for i_path in self.input_paths:
            test_path = os.path.join(i_path, fpath)
            if os.path.isfile(test_path):
                input_path = test_path
                break
        assert input_path

        make_symlink(input_path, input_work_path)
Ejemplo n.º 14
0
Archivo: cice.py Proyecto: coecms/payu
    def link_restart(self, fpath):

        input_work_path = os.path.join(self.work_path, fpath)

        # Exit if the restart file already exists
        if os.path.isfile(input_work_path):
            return

        input_path = None
        for i_path in self.input_paths:
            test_path = os.path.join(i_path, fpath)
            if os.path.isfile(test_path):
                input_path = test_path
                break
        assert input_path

        make_symlink(input_path, input_work_path)
Ejemplo n.º 15
0
    def make_links(self):
        """
        Payu integration function for creating symlinks in work directories which point
        back to the original file
        """
        delete_list = []
        for filepath in self:
            # Check file exists. It may have been deleted but still in manifest
            if not os.path.exists(self.fullpath(filepath)):
                delete_list.append(filepath)
                continue

            if self.copy_file(filepath):
                shutil.copy(self.fullpath(filepath), filepath)
            else:
                make_symlink(self.fullpath(filepath), filepath)

        for filepath in delete_list:
            print("File not found: {} removing from manifest".format(self.fullpath(filepath)))
            self.delete(filepath)
            self.needsync = True
Ejemplo n.º 16
0
    def make_links(self):
        """
        Payu integration function for creating symlinks in work directories which point
        back to the original file
        """
        delete_list = []
        for filepath in self:
            # Check file exists. It may have been deleted but still in manifest
            if not os.path.exists(self.fullpath(filepath)):
                delete_list.append(filepath)
                continue

            if self.copy_file(filepath):
                shutil.copy(self.fullpath(filepath), filepath)
            else:
                make_symlink(self.fullpath(filepath), filepath)

        for filepath in delete_list:
            print("File not found: {} removing from manifest".format(
                self.fullpath(filepath)))
            self.delete(filepath)
            self.needsync = True
Ejemplo n.º 17
0
Archivo: um.py Proyecto: nicjhan/payu
    def setup(self):
        super(UnifiedModel, self).setup()

        # Stage the UM restart file.
        if self.prior_restart_path and not self.expt.repeat_run:
            f_src = os.path.join(self.prior_restart_path, self.restart)
            f_dst = os.path.join(self.work_input_path, self.restart)

            if os.path.isfile(f_src):
                make_symlink(f_src, f_dst)

        # Set up environment variables needed to run UM.
        # Look for a python file in the config directory.
        um_env = imp.load_source("um_env", os.path.join(self.control_path, "um_env.py"))
        um_vars = um_env.vars

        assert len(self.input_paths) == 1

        # Set paths in environment variables.
        for k in um_vars.keys():
            um_vars[k] = um_vars[k].format(input_path=self.input_paths[0], work_path=self.work_path)
        os.environ.update(um_vars)

        # The above needs to be done in parexe also.
        # FIXME: a better way to do this or remove.
        parexe = os.path.join(self.work_path, "parexe")
        for line in fileinput.input(parexe, inplace=True):
            line = line.format(input_path=self.input_paths[0], work_path=self.work_path)
            print(line, end="")

        work_nml_path = os.path.join(self.work_path, "namelists")
        work_nml = f90nml.read(work_nml_path)

        # Modify namelists for a continuation run.
        if self.prior_output_path and not self.expt.repeat_run:

            prior_nml_path = os.path.join(self.prior_output_path, "namelists")
            prior_nml = f90nml.read(prior_nml_path)

            basis_time = prior_nml["NLSTCALL"]["MODEL_BASIS_TIME"]
            init_date = um_date_to_date(basis_time)
            resubmit_inc = prior_nml["NLSTCALL"]["RUN_RESUBMIT_INC"]
            runtime = um_time_to_time(resubmit_inc)

            run_start_date = cal.date_plus_seconds(init_date, runtime, cal.GREGORIAN)

            # Write out and save new calendar information.
            run_start_date_um = date_to_um_date(run_start_date)
            work_nml["NLSTCALL"]["MODEL_BASIS_TIME"] = run_start_date_um
            work_nml["NLSTCALL"]["ANCIL_REFTIME"] = run_start_date_um

            # Tell CABLE that this is a continuation run.
            cable_nml_path = os.path.join(self.work_path, "cable.nml")
            cable_nml = f90nml.read(cable_nml_path)
            cable_nml["cable"]["cable_user"]["CABLE_RUNTIME_COUPLED"] = False
            cable_nml.write(cable_nml_path, force=True)

        else:
            run_start_date = work_nml["NLSTCALL"]["MODEL_BASIS_TIME"]
            run_start_date = um_date_to_date(run_start_date)

        # Set the runtime for this run.
        if self.expt.runtime:
            run_runtime = cal.runtime_from_date(
                run_start_date,
                self.expt.runtime["years"],
                self.expt.runtime["months"],
                self.expt.runtime["days"],
                self.expt.runtime.get("seconds", 0),
                cal.GREGORIAN,
            )
            run_runtime = time_to_um_time(run_runtime)
            work_nml["NLSTCALL"]["RUN_RESUBMIT_INC"] = run_runtime
            work_nml["NLSTCALL"]["RUN_TARGET_END"] = run_runtime
            work_nml["STSHCOMP"]["RUN_TARGET_END"] = run_runtime

        work_nml.write(work_nml_path, force=True)
Ejemplo n.º 18
0
Archivo: um.py Proyecto: HoWol76/payu
    def setup(self):
        super(UnifiedModel, self).setup()

        # Set up environment variables needed to run UM.
        # Look for a python file in the config directory.
        um_env = imp.load_source('um_env',
                                 os.path.join(self.control_path, 'um_env.py'))
        um_vars = um_env.vars

        # Stage the UM restart file.
        if self.prior_restart_path and not self.expt.repeat_run:
            f_src = os.path.join(self.prior_restart_path, self.restart)
            f_dst = os.path.join(self.work_input_path, self.restart)

            if os.path.isfile(f_src):
                make_symlink(f_src, f_dst)
                # every run is an NRUN with an updated ASTART file
                um_vars['ASTART'] = self.restart
                um_vars['TYPE'] = 'NRUN'

        # Set paths in environment variables.
        for k in um_vars.keys():
            um_vars[k] = um_vars[k].format(input_path=self.input_paths[0],
                                           work_path=self.work_path)
        os.environ.update(um_vars)

        # The above needs to be done in parexe also.
        # FIXME: a better way to do this or remove.
        parexe = os.path.join(self.work_path, 'parexe')
        for line in fileinput.input(parexe, inplace=True):
            line = line.format(input_path=self.input_paths[0],
                               work_path=self.work_path)
            print(line, end='')

        work_nml_path = os.path.join(self.work_path, 'namelists')
        work_nml = f90nml.read(work_nml_path)

        # Modify namelists for a continuation run.
        if self.prior_output_path and not self.expt.repeat_run:

            prior_nml_path = os.path.join(self.prior_output_path, 'namelists')
            prior_nml = f90nml.read(prior_nml_path)

            basis_time = prior_nml['NLSTCALL']['MODEL_BASIS_TIME']
            init_date = um_date_to_date(basis_time)
            resubmit_inc = prior_nml['NLSTCALL']['RUN_RESUBMIT_INC']
            runtime = um_time_to_time(resubmit_inc)

            run_start_date = cal.date_plus_seconds(init_date, runtime,
                                                   cal.GREGORIAN)

            # Write out and save new calendar information.
            run_start_date_um = date_to_um_date(run_start_date)
            work_nml['NLSTCALL']['MODEL_BASIS_TIME'] = run_start_date_um
            work_nml['NLSTCALL']['ANCIL_REFTIME'] = run_start_date_um

            # Tell CABLE that this is a continuation run.
            cable_nml_path = os.path.join(self.work_path, 'cable.nml')
            cable_nml = f90nml.read(cable_nml_path)
            cable_nml['cable']['cable_user']['CABLE_RUNTIME_COUPLED'] = False
            cable_nml.write(cable_nml_path, force=True)

        else:
            run_start_date = work_nml['NLSTCALL']['MODEL_BASIS_TIME']
            run_start_date = um_date_to_date(run_start_date)

        # Set the runtime for this run.
        if self.expt.runtime:
            run_runtime = cal.runtime_from_date(
                run_start_date, self.expt.runtime['years'],
                self.expt.runtime['months'], self.expt.runtime['days'],
                self.expt.runtime.get('seconds', 0), cal.GREGORIAN)
            run_runtime = time_to_um_time(run_runtime)
            work_nml['NLSTCALL']['RUN_RESUBMIT_INC'] = run_runtime
            work_nml['NLSTCALL']['RUN_TARGET_END'] = run_runtime
            work_nml['STSHCOMP']['RUN_TARGET_END'] = run_runtime

        work_nml.write(work_nml_path, force=True)
Ejemplo n.º 19
0
    def setup(self):
        super(UnifiedModel, self).setup()

        # Set up environment variables needed to run UM.
        # Look for a python file in the config directory.
        um_env = imp.load_source('um_env',
                                 os.path.join(self.control_path, 'um_env.py'))
        um_vars = um_env.vars

        # Stage the UM restart file.
        if self.prior_restart_path and not self.expt.repeat_run:
            f_src = os.path.join(self.prior_restart_path, self.restart)
            f_dst = os.path.join(self.work_input_path, self.restart)

            if os.path.isfile(f_src):
                make_symlink(f_src, f_dst)
                # every run is an NRUN with an updated ASTART file
                um_vars['ASTART'] = self.restart
                um_vars['TYPE'] = 'NRUN'

        # Set paths in environment variables.
        for k in um_vars.keys():
            um_vars[k] = um_vars[k].format(input_path=self.input_paths[0],
                                           work_path=self.work_path)
        os.environ.update(um_vars)

        # The above needs to be done in parexe also.
        # FIXME: a better way to do this or remove.
        parexe = os.path.join(self.work_path, 'parexe')
        for line in fileinput.input(parexe, inplace=True):
            line = line.format(input_path=self.input_paths[0],
                               work_path=self.work_path)
            print(line, end='')

        work_nml_path = os.path.join(self.work_path, 'namelists')
        work_nml = f90nml.read(work_nml_path)

        restart_calendar_path = os.path.join(self.work_init_path,
                                             self.restart_calendar_file)

        # Modify namelists for a continuation run.
        if self.prior_restart_path and not self.expt.repeat_run \
           and os.path.exists(restart_calendar_path):

            with open(restart_calendar_path, 'r') as restart_file:
                restart_info = yaml.load(restart_file)

            run_start_date = restart_info['end_date']

            # Write out and save new calendar information.
            run_start_date_um = date_to_um_date(run_start_date)
            work_nml['NLSTCALL']['MODEL_BASIS_TIME'] = run_start_date_um
            work_nml['NLSTCALL']['ANCIL_REFTIME'] = run_start_date_um

        else:
            run_start_date = work_nml['NLSTCALL']['MODEL_BASIS_TIME']
            run_start_date = um_date_to_date(run_start_date)

        # Set the runtime for this run.
        if self.expt.runtime:
            run_runtime = cal.runtime_from_date(
                run_start_date,
                self.expt.runtime['years'],
                self.expt.runtime['months'],
                self.expt.runtime['days'],
                self.expt.runtime.get('seconds', 0),
                cal.GREGORIAN)
            run_runtime = time_to_um_time(run_runtime)
            work_nml['NLSTCALL']['RUN_RESUBMIT_INC'] = run_runtime
            work_nml['NLSTCALL']['RUN_TARGET_END'] = run_runtime
            work_nml['STSHCOMP']['RUN_TARGET_END'] = run_runtime

        work_nml.write(work_nml_path, force=True)
Ejemplo n.º 20
0
    def run(self, *user_flags):

        self.load_modules()

        f_out = open(self.stdout_fname, 'w')
        f_err = open(self.stderr_fname, 'w')

        # Set MPI environment variables
        env = self.config.get('env')

        # Explicitly check for `None`, in case of an empty `env:` entry
        if env is None:
            env = {}

        for var in env:

            if env[var] is None:
                env_value = ''
            else:
                env_value = str(env[var])

            os.environ[var] = env_value

        mpi_config = self.config.get('mpi', {})
        mpi_runcmd = mpi_config.get('runcmd', 'mpirun')

        if self.config.get('scalasca', False):
            mpi_runcmd = ' '.join(['scalasca -analyze', mpi_runcmd])

        # MPI runtime flags
        mpi_flags = mpi_config.get('flags', [])
        if not mpi_flags:
            mpi_flags = self.config.get('mpirun', [])
            # TODO: Legacy config removal warning

        if type(mpi_flags) != list:
            mpi_flags = [mpi_flags]

        # TODO: More uniform support needed here
        if self.config.get('scalasca', False):
            mpi_flags = ['\"{}\"'.format(f) for f in mpi_flags]

        # XXX: I think this may be broken
        if user_flags:
            mpi_flags.extend(list(user_flags))

        if self.debug:
            mpi_flags.append('--debug')

        mpi_progs = []
        for model in self.models:

            # Skip models without executables (e.g. couplers)
            if not model.exec_path:
                continue

            mpi_config = self.config.get('mpi', {})
            mpi_module = mpi_config.get('module', None)

            # Update MPI library module (if not explicitly set)
            # TODO: Check for MPI library mismatch across multiple binaries
            if mpi_module is None:
                mpi_module = envmod.lib_update(model.exec_path, 'libmpi.so')

            model_prog = []

            # Our MPICH wrapper does not support a working directory flag
            if not mpi_module.startswith('mvapich'):
                model_prog.append('-wdir {}'.format(model.work_path))

            # Append any model-specific MPI flags
            model_flags = model.config.get('mpiflags', [])
            if not isinstance(model_flags, list):
                model_prog.append(model_flags)
            else:
                model_prog.extend(model_flags)

            model_ncpus = model.config.get('ncpus')
            if model_ncpus:
                model_prog.append('-np {}'.format(model_ncpus))

            model_npernode = model.config.get('npernode')
            # TODO: New Open MPI format?
            if model_npernode:
                if model_npernode % 2 == 0:
                    npernode_flag = ('-map-by ppr:{}:socket'
                                     ''.format(model_npernode / 2))
                else:
                    npernode_flag = ('-map-by ppr:{}:node'
                                     ''.format(model_npernode))

                if self.config.get('scalasca', False):
                    npernode_flag = '\"{}\"'.format(npernode_flag)
                model_prog.append(npernode_flag)

            if self.config.get('hpctoolkit', False):
                os.environ['HPCRUN_EVENT_LIST'] = 'WALLCLOCK@5000'
                model_prog.append('hpcrun')

            for prof in self.profilers:
                if prof.runscript:
                    model_prog = model_prog.append(prof.runscript)

            model_prog.append(model.exec_prefix)
            model_prog.append(model.exec_path)

            mpi_progs.append(' '.join(model_prog))

        cmd = '{} {} {}'.format(mpi_runcmd, ' '.join(mpi_flags),
                                ' : '.join(mpi_progs))

        for prof in self.profilers:
            cmd = prof.wrapper(cmd)

        # Expand shell variables inside flags
        if self.expand_shell_vars:
            cmd = os.path.expandvars(cmd)

        print(cmd)

        # Our MVAPICH wrapper does not support working directories
        if mpi_module.startswith('mvapich'):
            curdir = os.getcwd()
            os.chdir(self.work_path)
        else:
            curdir = None

        # NOTE: This may not be necessary, since env seems to be getting
        # correctly updated.  Need to look into this.
        if env:
            # TODO: Replace with mpirun -x flag inputs
            proc = sp.Popen(shlex.split(cmd),
                            stdout=f_out,
                            stderr=f_err,
                            env=os.environ.copy())
            proc.wait()
            rc = proc.returncode
        else:
            rc = sp.call(shlex.split(cmd), stdout=f_out, stderr=f_err)

        # Return to control directory
        if curdir:
            os.chdir(curdir)

        if self.runlog:
            self.runlog.commit()

        f_out.close()
        f_err.close()

        # Remove any empty output files (e.g. logs)
        for fname in os.listdir(self.work_path):
            fpath = os.path.join(self.work_path, fname)
            if os.path.getsize(fpath) == 0:
                os.remove(fpath)

        # Clean up any profiling output
        # TODO: Move after `rc` code check?
        for prof in self.profilers:
            prof.postprocess()

        # TODO: Need a model-specific cleanup method call here
        # NOTE: This does not appear to catch hanging jobs killed by PBS
        if rc != 0:
            # Backup logs for failed runs
            error_log_dir = os.path.join(self.archive_path, 'error_logs')
            mkdir_p(error_log_dir)

            # NOTE: This is PBS-specific
            job_id = os.environ.get('PBS_JOBID', '')

            for fname in (self.stdout_fname, self.stderr_fname):
                src = os.path.join(self.control_path, fname)

                # NOTE: This assumes standard .out/.err extensions
                dest = os.path.join(error_log_dir,
                                    fname[:-4] + '.' + job_id + fname[-4:])
                print(src, dest)

                shutil.copyfile(src, dest)

            # Create the symlink to the logs if it does not exist
            make_symlink(self.archive_path, self.archive_sym_path)

            # Terminate payu
            sys.exit('payu: Model exited with error code {}; aborting.'
                     ''.format(rc))

        # Decrement run counter on successful run
        stop_file_path = os.path.join(self.control_path, 'stop_run')
        if os.path.isfile(stop_file_path):
            assert os.stat(stop_file_path).st_size == 0
            os.remove(stop_file_path)
            print('payu: Stop file detected; terminating resubmission.')
            self.n_runs = 0
        else:
            self.n_runs -= 1

        # Move logs to archive (or delete if empty)
        for f in (self.stdout_fname, self.stderr_fname):
            f_path = os.path.join(self.control_path, f)
            if os.path.getsize(f_path) == 0:
                os.remove(f_path)
            else:
                shutil.move(f_path, self.work_path)

        run_script = self.userscripts.get('run')
        if run_script:
            self.run_userscript(run_script)
Ejemplo n.º 21
0
    def setup(self):
        # FMS initialisation
        super(Mom, self).setup()

        if not self.top_level_model:
            # Make log dir
            mkdir_p(os.path.join(self.work_path, 'log'))

        input_nml_path = os.path.join(self.work_path, 'input.nml')
        input_nml = f90nml.read(input_nml_path)

        use_core2iaf = self.config.get('core2iaf')
        if use_core2iaf:
            self.core2iaf_setup()

        # Set the runtime
        if self.expt.runtime:
            ocean_solo_nml = input_nml['ocean_solo_nml']

            ocean_solo_nml['years'] = self.expt.runtime['years']
            ocean_solo_nml['months'] = self.expt.runtime['months']
            ocean_solo_nml['days'] = self.expt.runtime['days']
            ocean_solo_nml['seconds'] = self.expt.runtime.get('seconds', 0)

            input_nml.write(input_nml_path, force=True)

        # Construct the land CPU mask
        if self.expt.config.get('mask_table', False):
            # NOTE: This function actually creates a mask table using the
            #       `check_mask` command line tool.  But it is not very usable
            #       since you need to know the number of masked CPUs to submit
            #       the job.  It needs a rethink of the submission process.
            self.create_mask_table(input_nml)

        # NOTE: Don't expect this to be here forever...
        # Attempt to set a mask table from the input
        if self.config.get('mask', False):
            mask_path = os.path.join(self.work_input_path, 'ocean_mask_table')

            # Remove any existing mask
            # (If no reference mask is available, then we will not use one)
            if os.path.isfile(mask_path):
                os.remove(mask_path)

            # Reference mask table
            assert('layout' in input_nml['ocean_model_nml'])
            nx, ny = input_nml['ocean_model_nml'].get('layout')
            n_masked_cpus = nx * ny - self.config.get('ncpus')

            mask_table_fname = 'mask_table.{nmask}.{nx}x{ny}'.format(
                nmask=n_masked_cpus,
                nx=nx,
                ny=ny
            )

            ref_mask_path = os.path.join(self.work_input_path,
                                         mask_table_fname)

            # Set (or replace) mask table if reference is available
            if os.path.isfile(ref_mask_path):
                make_symlink(ref_mask_path, mask_path)
Ejemplo n.º 22
0
    def archive(self):
        if not self.config.get('archive', True):
            print('payu: not archiving due to config.yaml setting.')
            return

        # Check there is a work directory, otherwise bail
        if not os.path.exists(self.work_sym_path):
            sys.exit('payu: error: No work directory to archive.')

        mkdir_p(self.archive_path)
        make_symlink(self.archive_path, self.archive_sym_path)

        # Remove work symlink
        if os.path.islink(self.work_sym_path):
            os.remove(self.work_sym_path)

        mkdir_p(self.restart_path)

        for model in self.models:
            model.archive()

        # Postprocess the model suite
        if len(self.models) > 1:
            self.model.archive()

        # Double-check that the run path does not exist
        if os.path.exists(self.output_path):
            sys.exit('payu: error: Output path already exists.')

        cmd = 'mv {work} {output}'.format(
            work=self.work_path,
            output=self.output_path
        )
        sp.check_call(shlex.split(cmd))

        # Remove old restart files
        # TODO: Move to subroutine
        restart_freq = self.config.get('restart_freq', default_restart_freq)
        restart_history = self.config.get('restart_history',
                                          default_restart_history)

        # Remove any outdated restart files
        prior_restart_dirs = [d for d in os.listdir(self.archive_path)
                              if d.startswith('restart')]

        for res_dir in prior_restart_dirs:

            res_idx = int(res_dir.lstrip('restart'))
            if (self.repeat_run or
                    (not res_idx % restart_freq == 0 and
                     res_idx <= (self.counter - restart_history))):

                res_path = os.path.join(self.archive_path, res_dir)

                # Only delete real directories; ignore symbolic restart links
                if os.path.isdir(res_path):
                    shutil.rmtree(res_path)

        collate_config = self.config.get('collate', {})
        if collate_config.get('enable', True):
            cmd = '{python} {payu} collate -i {expt}'.format(
                python=sys.executable,
                payu=self.payu_path,
                expt=self.counter
            )
            sp.check_call(shlex.split(cmd))

        if self.config.get('hpctoolkit', False):
            cmd = '{python} {payu} profile -i {expt}'.format(
                python=sys.executable,
                payu=self.payu_path,
                expt=self.counter
            )
            sp.check_call(shlex.split(cmd))

        archive_script = self.userscripts.get('archive')
        if archive_script:
            self.run_userscript(archive_script)
Ejemplo n.º 23
0
    def archive(self):
        if not self.config.get('archive', True):
            print('payu: not archiving due to config.yaml setting.')
            return

        # Check there is a work directory, otherwise bail
        if not os.path.exists(self.work_sym_path):
            sys.exit('payu: error: No work directory to archive.')

        mkdir_p(self.archive_path)
        make_symlink(self.archive_path, self.archive_sym_path)

        # Remove work symlink
        if os.path.islink(self.work_sym_path):
            os.remove(self.work_sym_path)

        mkdir_p(self.restart_path)

        for model in self.models:
            model.archive()

        # Postprocess the model suite
        if len(self.models) > 1:
            self.model.archive()

        # Double-check that the run path does not exist
        if os.path.exists(self.output_path):
            sys.exit('payu: error: Output path already exists.')

        movetree(self.work_path, self.output_path)

        # Remove old restart files
        # TODO: Move to subroutine
        restart_freq = self.config.get('restart_freq', default_restart_freq)
        restart_history = self.config.get('restart_history',
                                          default_restart_history)

        # Remove any outdated restart files
        prior_restart_dirs = [d for d in os.listdir(self.archive_path)
                              if d.startswith('restart')]

        for res_dir in prior_restart_dirs:

            res_idx = int(res_dir.lstrip('restart'))
            if (self.repeat_run or
                    (not res_idx % restart_freq == 0 and
                     res_idx <= (self.counter - restart_history))):

                res_path = os.path.join(self.archive_path, res_dir)

                # Only delete real directories; ignore symbolic restart links
                if (os.path.isdir(res_path) and not os.path.islink(res_path)):
                    shutil.rmtree(res_path)

        # Ensure dynamic library support for subsequent python calls
        ld_libpaths = os.environ['LD_LIBRARY_PATH']
        py_libpath = sysconfig.get_config_var('LIBDIR')
        if py_libpath not in ld_libpaths.split(':'):
            os.environ['LD_LIBRARY_PATH'] = ':'.join([py_libpath, ld_libpaths])

        collate_config = self.config.get('collate', {})
        if collate_config.get('enable', True):
            cmd = '{python} {payu} collate -i {expt}'.format(
                python=sys.executable,
                payu=self.payu_path,
                expt=self.counter
            )
            sp.check_call(shlex.split(cmd))

        if self.config.get('hpctoolkit', False):
            cmd = '{python} {payu} profile -i {expt}'.format(
                python=sys.executable,
                payu=self.payu_path,
                expt=self.counter
            )
            sp.check_call(shlex.split(cmd))

        archive_script = self.userscripts.get('archive')
        if archive_script:
            self.run_userscript(archive_script)
Ejemplo n.º 24
0
    def run(self, *user_flags):

        # XXX: This was previously done in reversion
        envmod.setup()

        self.load_modules()

        f_out = open(self.stdout_fname, 'w')
        f_err = open(self.stderr_fname, 'w')

        # Set MPI environment variables
        env = self.config.get('env')

        # Explicitly check for `None`, in case of an empty `env:` entry
        if env is None:
            env = {}

        for var in env:

            if env[var] is None:
                env_value = ''
            else:
                env_value = str(env[var])

            os.environ[var] = env_value

        mpi_config = self.config.get('mpi', {})
        mpi_runcmd = mpi_config.get('runcmd', 'mpirun')

        if self.config.get('scalasca', False):
            mpi_runcmd = ' '.join(['scalasca -analyze', mpi_runcmd])

        # MPI runtime flags
        mpi_flags = mpi_config.get('flags', [])
        if not mpi_flags:
            mpi_flags = self.config.get('mpirun', [])
            # TODO: Legacy config removal warning

        if type(mpi_flags) != list:
            mpi_flags = [mpi_flags]

        # TODO: More uniform support needed here
        if self.config.get('scalasca', False):
            mpi_flags = ['\"{0}\"'.format(f) for f in mpi_flags]

        # XXX: I think this may be broken
        if user_flags:
            mpi_flags.extend(list(user_flags))

        if self.debug:
            mpi_flags.append('--debug')

        mpi_progs = []
        for model in self.models:

            # Skip models without executables (e.g. couplers)
            if not model.exec_path_local:
                continue

            mpi_config = self.config.get('mpi', {})
            mpi_module = mpi_config.get('module', None)

            # Update MPI library module (if not explicitly set)
            # TODO: Check for MPI library mismatch across multiple binaries
            if mpi_module is None:
                mpi_module = envmod.lib_update(
                    model.exec_path_local,
                    'libmpi.so'
                )

            model_prog = []

            # Our MPICH wrapper does not support a working directory flag
            if not mpi_module.startswith('mvapich'):
                model_prog.append('-wdir {0}'.format(model.work_path))

            # Append any model-specific MPI flags
            model_flags = model.config.get('mpiflags', [])
            if not isinstance(model_flags, list):
                model_prog.append(model_flags)
            else:
                model_prog.extend(model_flags)

            model_ncpus = model.config.get('ncpus')
            if model_ncpus:
                model_prog.append('-np {0}'.format(model_ncpus))

            model_npernode = model.config.get('npernode')
            # TODO: New Open MPI format?
            if model_npernode:
                if model_npernode % 2 == 0:
                    npernode_flag = ('-map-by ppr:{0}:socket'
                                     ''.format(model_npernode / 2))
                else:
                    npernode_flag = ('-map-by ppr:{0}:node'
                                     ''.format(model_npernode))

                if self.config.get('scalasca', False):
                    npernode_flag = '\"{0}\"'.format(npernode_flag)
                model_prog.append(npernode_flag)

            if self.config.get('hpctoolkit', False):
                os.environ['HPCRUN_EVENT_LIST'] = 'WALLCLOCK@5000'
                model_prog.append('hpcrun')

            for prof in self.profilers:
                if prof.runscript:
                    model_prog = model_prog.append(prof.runscript)

            model_prog.append(model.exec_prefix)

            # Use the full path to symlinked exec_name in work as some
            # older MPI libraries complained executable was not in PATH
            model_prog.append(os.path.join(model.work_path, model.exec_name))

            mpi_progs.append(' '.join(model_prog))

        cmd = '{runcmd} {flags} {exes}'.format(
            runcmd=mpi_runcmd,
            flags=' '.join(mpi_flags),
            exes=' : '.join(mpi_progs)
        )

        for prof in self.profilers:
            cmd = prof.wrapper(cmd)

        # Expand shell variables inside flags
        if self.expand_shell_vars:
            cmd = os.path.expandvars(cmd)

        # TODO: Consider making this default
        if self.config.get('coredump', False):
            enable_core_dump()

        # Our MVAPICH wrapper does not support working directories
        if mpi_module.startswith('mvapich'):
            curdir = os.getcwd()
            os.chdir(self.work_path)
        else:
            curdir = None

        # Dump out environment
        with open(self.env_fname, 'w') as file:
            file.write(yaml.dump(dict(os.environ), default_flow_style=False))

        self.runlog.create_manifest()
        if self.runlog.enabled:
            self.runlog.commit()

        # NOTE: This may not be necessary, since env seems to be getting
        # correctly updated.  Need to look into this.
        print(cmd)
        if env:
            # TODO: Replace with mpirun -x flag inputs
            proc = sp.Popen(shlex.split(cmd), stdout=f_out, stderr=f_err,
                            env=os.environ.copy())
            proc.wait()
            rc = proc.returncode
        else:
            rc = sp.call(shlex.split(cmd), stdout=f_out, stderr=f_err)

        # Return to control directory
        if curdir:
            os.chdir(curdir)

        f_out.close()
        f_err.close()

        self.finish_time = datetime.datetime.now()

        info = get_job_info()

        if info is None:
            # Not being run under PBS, reverse engineer environment
            info = {
                'PAYU_PATH': os.path.dirname(self.payu_path)
            }

        # Add extra information to save to jobinfo
        info.update(
            {
                'PAYU_CONTROL_DIR': self.control_path,
                'PAYU_RUN_ID': self.run_id,
                'PAYU_CURRENT_RUN': self.counter,
                'PAYU_N_RUNS':  self.n_runs,
                'PAYU_JOB_STATUS': rc,
                'PAYU_START_TIME': self.start_time.isoformat(),
                'PAYU_FINISH_TIME': self.finish_time.isoformat(),
                'PAYU_WALLTIME': "{0} s".format(
                    (self.finish_time - self.start_time).total_seconds()
                ),
            }
        )

        # Dump job info
        with open(self.job_fname, 'w') as file:
            file.write(yaml.dump(info, default_flow_style=False))

        # Remove any empty output files (e.g. logs)
        for fname in os.listdir(self.work_path):
            fpath = os.path.join(self.work_path, fname)
            if os.path.getsize(fpath) == 0:
                os.remove(fpath)

        # Clean up any profiling output
        # TODO: Move after `rc` code check?
        for prof in self.profilers:
            prof.postprocess()

        # TODO: Need a model-specific cleanup method call here
        # NOTE: This does not appear to catch hanging jobs killed by PBS
        if rc != 0:
            # Backup logs for failed runs
            error_log_dir = os.path.join(self.archive_path, 'error_logs')
            mkdir_p(error_log_dir)

            # NOTE: This is PBS-specific
            job_id = get_job_id(short=False)

            if job_id == '':
                job_id = str(self.run_id)[:6]

            for fname in self.output_fnames:

                src = os.path.join(self.control_path, fname)

                stem, suffix = os.path.splitext(fname)
                dest = os.path.join(error_log_dir,
                                    ".".join((stem, job_id)) + suffix)

                print(src, dest)

                shutil.copyfile(src, dest)

            # Create the symlink to the logs if it does not exist
            make_symlink(self.archive_path, self.archive_sym_path)

            error_script = self.userscripts.get('error')
            if error_script:
                self.run_userscript(error_script)

            # Terminate payu
            sys.exit('payu: Model exited with error code {0}; aborting.'
                     ''.format(rc))

        # Decrement run counter on successful run
        stop_file_path = os.path.join(self.control_path, 'stop_run')
        if os.path.isfile(stop_file_path):
            assert os.stat(stop_file_path).st_size == 0
            os.remove(stop_file_path)
            print('payu: Stop file detected; terminating resubmission.')
            self.n_runs = 0
        else:
            self.n_runs -= 1

        # Move logs to archive (or delete if empty)
        for f in self.output_fnames:
            f_path = os.path.join(self.control_path, f)
            if os.path.getsize(f_path) == 0:
                os.remove(f_path)
            else:
                shutil.move(f_path, self.work_path)

        run_script = self.userscripts.get('run')
        if run_script:
            self.run_userscript(run_script)
Ejemplo n.º 25
0
    def setup(self):
        if not self.top_level_model:
            return

        cpl_keys = {'cice': ('input_ice.nml', 'coupling', 'runtime0'),
                    'matm': ('input_atm.nml', 'coupling', 'truntime0')}

        # Keep track of this in order to set the oasis runtime.
        run_runtime = 0

        for model in self.expt.models:

            if model.model_type == 'cice' or model.model_type == 'cice5':

                # Horrible hack to make a link to o2i.nc in the
                # work/ice/RESTART directory
                f_name = 'o2i.nc'
                f_src = os.path.join(model.work_path, f_name)
                f_dst = os.path.join(model.work_restart_path, f_name)

                if os.path.isfile(f_src):
                    make_symlink(f_src, f_dst)

            if model.model_type == 'cice5':

                # Stage the supplemental input files
                if model.prior_restart_path:
                    for f_name in model.access_restarts:
                        f_src = os.path.join(model.prior_restart_path, f_name)
                        f_dst = os.path.join(model.work_input_path, f_name)

                        if os.path.isfile(f_src):
                            make_symlink(f_src, f_dst)

            if model.model_type in ('cice', 'matm'):

                # Update the supplemental OASIS namelists
                cpl_fname, cpl_group, runtime0_key = cpl_keys[model.model_type]

                cpl_fpath = os.path.join(model.work_path, cpl_fname)
                cpl_nml = f90nml.read(cpl_fpath)

                # Which calendar are we using, noleap or Gregorian.
                caltype = cpl_nml[cpl_group]['caltype']
                init_date = cal.int_to_date(cpl_nml[cpl_group]['init_date'])

                # Get time info about the beginning of this run. We're
                # interested in:
                #   1. start date of run
                #   2. total runtime of all previous runs.
                if model.prior_restart_path and not self.expt.repeat_run:

                    prior_cpl_fpath = os.path.join(model.prior_restart_path,
                                                   cpl_fname)

                    # With later versions this file exists in the prior restart
                    # path, but this was not always the case, so check, and if
                    # not there use prior output path
                    if not os.path.exists(prior_cpl_fpath):
                        print('payu: warning: {0} missing from prior restart '
                              'path; checking prior output.'.format(cpl_fname),
                              file=sys.stderr)
                        if not os.path.isdir(model.prior_output_path):
                            print('payu: error: No prior output path; '
                                  'aborting run.')
                            sys.exit(errno.ENOENT)

                        prior_cpl_fpath = os.path.join(model.prior_output_path,
                                                       cpl_fname)

                    try:
                        prior_cpl_nml = f90nml.read(prior_cpl_fpath)
                    except IOError as exc:
                        if exc.errno == errno.ENOENT:
                            print('payu: error: {0} does not exist; aborting.'
                                  ''.format(prior_cpl_fpath), file=sys.stderr)
                            sys.exit(exc.errno)
                        else:
                            raise

                    cpl_nml_grp = prior_cpl_nml[cpl_group]

                    # The total time in seconds since the beginning of
                    # the experiment.
                    total_runtime = int(cpl_nml_grp[runtime0_key] +
                                        cpl_nml_grp['runtime'])
                    run_start_date = cal.date_plus_seconds(init_date,
                                                           total_runtime,
                                                           caltype)

                else:
                    total_runtime = 0
                    run_start_date = init_date

                # Get new runtime for this run. We get this from either the
                # 'runtime' part of the payu config, or from the namelist
                if self.expt.runtime:
                    run_runtime = cal.runtime_from_date(
                        run_start_date,
                        self.expt.runtime['years'],
                        self.expt.runtime['months'],
                        self.expt.runtime['days'],
                        self.expt.runtime.get('seconds', 0),
                        caltype)
                else:
                    run_runtime = cpl_nml[cpl_group]['runtime']

                # Now write out new run start date and total runtime.
                cpl_nml[cpl_group]['inidate'] = cal.date_to_int(run_start_date)
                cpl_nml[cpl_group][runtime0_key] = total_runtime
                cpl_nml[cpl_group]['runtime'] = int(run_runtime)

                if model.model_type == 'cice':
                    if self.expt.counter and not self.expt.repeat_run:
                        cpl_nml[cpl_group]['jobnum'] = 1 + self.expt.counter
                    else:
                        cpl_nml[cpl_group]['jobnum'] = 1

                nml_work_path = os.path.join(model.work_path, cpl_fname)
                f90nml.write(cpl_nml, nml_work_path + '~')
                shutil.move(nml_work_path + '~', nml_work_path)

        # Now change the oasis runtime. This needs to be done after the others.
        for model in self.expt.models:
            if model.model_type == 'oasis':
                namcouple = os.path.join(model.work_path, 'namcouple')

                s = ''
                with open(namcouple, 'r+') as f:
                    s = f.read()
                    m = re.search(r"^[ \t]*\$RUNTIME.*?^[ \t]*(\d+)", s,
                                  re.MULTILINE | re.DOTALL)
                    assert(m is not None)
                    s = s[:m.start(1)] + str(run_runtime) + s[m.end(1):]

                with open(namcouple, 'w') as f:
                    f.write(s)
Ejemplo n.º 26
0
    def setup(self):

        cpl_keys = {'cice': ('input_ice.nml', 'coupling_nml', 'runtime0'),
                    'matm': ('input_atm.nml', 'coupling', 'truntime0')}

        # Keep track of this in order to set the oasis runtime.
        run_runtime = 0

        for model in self.expt.models:

            if model.model_type == 'cice':

                # Stage the supplemental input files
                if model.prior_restart_path:
                    for f_name in model.access_restarts:
                        f_src = os.path.join(model.prior_restart_path, f_name)
                        f_dst = os.path.join(model.work_input_path, f_name)

                        if os.path.isfile(f_src):
                            make_symlink(f_src, f_dst)

            if model.model_type in ('cice', 'matm'):

                # Update the supplemental OASIS namelists
                cpl_fname, cpl_group, runtime0_key = cpl_keys[model.model_type]

                cpl_fpath = os.path.join(model.work_path, cpl_fname)
                cpl_nml = f90nml.read(cpl_fpath)

                # Which calendar are we using, noleap or Gregorian.
                caltype = cpl_nml[cpl_group]['caltype']
                init_date = cal.int_to_date(cpl_nml[cpl_group]['init_date'])

                # Get time info about the beginning of this run. We're
                # interested in:
                #   1. start date of run
                #   2. total runtime of all previous runs.
                if model.prior_output_path and not self.expt.repeat_run:

                    prior_cpl_fpath = os.path.join(model.prior_output_path,
                                                   cpl_fname)
                    prior_cpl_nml = f90nml.read(prior_cpl_fpath)
                    cpl_nml_grp = prior_cpl_nml[cpl_group]

                    # The total time in seconds since the beginning of
                    # the experiment.
                    total_runtime = int(cpl_nml_grp[runtime0_key] +
                                        cpl_nml_grp['runtime'])
                    run_start_date = cal.date_plus_seconds(init_date,
                                                           total_runtime,
                                                           caltype)

                else:
                    total_runtime = 0
                    run_start_date = init_date

                # Get new runtime for this run. We get this from either the
                # 'runtime' part of the payu config, or from the namelist
                if self.expt.runtime:
                    run_runtime = cal.runtime_from_date(
                        run_start_date,
                        self.expt.runtime['years'],
                        self.expt.runtime['months'],
                        self.expt.runtime['days'],
                        self.expt.runtime.get('seconds', 0),
                        caltype)
                else:
                    run_runtime = cpl_nml[cpl_group]['runtime']

                # Now write out new run start date and total runtime.
                cpl_nml[cpl_group]['inidate'] = cal.date_to_int(run_start_date)
                cpl_nml[cpl_group][runtime0_key] = total_runtime
                cpl_nml[cpl_group]['runtime'] = int(run_runtime)

                if model.model_type == 'cice':
                    if self.expt.counter and not self.expt.repeat_run:
                        cpl_nml[cpl_group]['jobnum'] = 1 + self.expt.counter
                    else:
                        cpl_nml[cpl_group]['jobnum'] = 1

                nml_work_path = os.path.join(model.work_path, cpl_fname)
                f90nml.write(cpl_nml, nml_work_path + '~')
                shutil.move(nml_work_path + '~', nml_work_path)

        # Now change the oasis runtime. This needs to be done after the others.
        for model in self.expt.models:
            if model.model_type == 'oasis':
                namcouple = os.path.join(model.work_path, 'namcouple')

                s = ''
                with open(namcouple, 'r+') as f:
                    s = f.read()
                    m = re.search(r"^[ \t]*\$RUNTIME.*?^[ \t]*(\d+)", s,
                                  re.MULTILINE | re.DOTALL)
                    assert(m is not None)
                    s = s[:m.start(1)] + str(run_runtime) + s[m.end(1):]

                with open(namcouple, 'w') as f:
                    f.write(s)
Ejemplo n.º 27
0
    def run(self, *user_flags):

        # XXX: This was previously done in reversion
        envmod.setup()

        self.load_modules()

        f_out = open(self.stdout_fname, 'w')
        f_err = open(self.stderr_fname, 'w')

        # Set MPI environment variables
        env = self.config.get('env')

        # Explicitly check for `None`, in case of an empty `env:` entry
        if env is None:
            env = {}

        for var in env:

            if env[var] is None:
                env_value = ''
            else:
                env_value = str(env[var])

            os.environ[var] = env_value

        mpi_config = self.config.get('mpi', {})
        mpi_runcmd = mpi_config.get('runcmd', 'mpirun')

        if self.config.get('scalasca', False):
            mpi_runcmd = ' '.join(['scalasca -analyze', mpi_runcmd])

        # MPI runtime flags
        mpi_flags = mpi_config.get('flags', [])
        if not mpi_flags:
            mpi_flags = self.config.get('mpirun', [])
            # TODO: Legacy config removal warning

        if type(mpi_flags) != list:
            mpi_flags = [mpi_flags]

        # TODO: More uniform support needed here
        if self.config.get('scalasca', False):
            mpi_flags = ['\"{0}\"'.format(f) for f in mpi_flags]

        # XXX: I think this may be broken
        if user_flags:
            mpi_flags.extend(list(user_flags))

        if self.debug:
            mpi_flags.append('--debug')

        mpi_progs = []
        for model in self.models:

            # Skip models without executables (e.g. couplers)
            if not model.exec_path_local:
                continue

            mpi_config = self.config.get('mpi', {})
            mpi_module = mpi_config.get('module', None)

            # Update MPI library module (if not explicitly set)
            # TODO: Check for MPI library mismatch across multiple binaries
            if mpi_module is None:
                mpi_module = envmod.lib_update(
                    model.exec_path_local,
                    'libmpi.so'
                )

            model_prog = []

            # Our MPICH wrapper does not support a working directory flag
            if not mpi_module.startswith('mvapich'):
                model_prog.append('-wdir {0}'.format(model.work_path))

            # Append any model-specific MPI flags
            model_flags = model.config.get('mpiflags', [])
            if not isinstance(model_flags, list):
                model_prog.append(model_flags)
            else:
                model_prog.extend(model_flags)

            model_ncpus = model.config.get('ncpus')
            if model_ncpus:
                model_prog.append('-np {0}'.format(model_ncpus))

            model_npernode = model.config.get('npernode')
            # TODO: New Open MPI format?
            if model_npernode:
                if model_npernode % 2 == 0:
                    npernode_flag = ('-map-by ppr:{0}:socket'
                                     ''.format(model_npernode / 2))
                else:
                    npernode_flag = ('-map-by ppr:{0}:node'
                                     ''.format(model_npernode))

                if self.config.get('scalasca', False):
                    npernode_flag = '\"{0}\"'.format(npernode_flag)
                model_prog.append(npernode_flag)

            if self.config.get('hpctoolkit', False):
                os.environ['HPCRUN_EVENT_LIST'] = 'WALLCLOCK@5000'
                model_prog.append('hpcrun')

            for prof in self.profilers:
                if prof.runscript:
                    model_prog = model_prog.append(prof.runscript)

            model_prog.append(model.exec_prefix)

            # Use the full path to symlinked exec_name in work as some
            # older MPI libraries complained executable was not in PATH
            model_prog.append(os.path.join(model.work_path, model.exec_name))

            mpi_progs.append(' '.join(model_prog))

        cmd = '{runcmd} {flags} {exes}'.format(
            runcmd=mpi_runcmd,
            flags=' '.join(mpi_flags),
            exes=' : '.join(mpi_progs)
        )

        for prof in self.profilers:
            cmd = prof.wrapper(cmd)

        # Expand shell variables inside flags
        if self.expand_shell_vars:
            cmd = os.path.expandvars(cmd)

        # TODO: Consider making this default
        if self.config.get('coredump', False):
            enable_core_dump()

        # Our MVAPICH wrapper does not support working directories
        if mpi_module.startswith('mvapich'):
            curdir = os.getcwd()
            os.chdir(self.work_path)
        else:
            curdir = None

        # Dump out environment
        with open(self.env_fname, 'w') as file:
            file.write(yaml.dump(dict(os.environ), default_flow_style=False))

        self.runlog.create_manifest()
        if self.runlog.enabled:
            self.runlog.commit()

        # NOTE: This may not be necessary, since env seems to be getting
        # correctly updated.  Need to look into this.
        print(cmd)
        if env:
            # TODO: Replace with mpirun -x flag inputs
            proc = sp.Popen(shlex.split(cmd), stdout=f_out, stderr=f_err,
                            env=os.environ.copy())
            proc.wait()
            rc = proc.returncode
        else:
            rc = sp.call(shlex.split(cmd), stdout=f_out, stderr=f_err)

        # Return to control directory
        if curdir:
            os.chdir(curdir)

        f_out.close()
        f_err.close()

        self.finish_time = datetime.datetime.now()

        info = get_job_info()

        if info is None:
            # Not being run under PBS, reverse engineer environment
            info = {
                'PAYU_PATH': os.path.dirname(self.payu_path)
            }

        # Add extra information to save to jobinfo
        info.update(
            {
                'PAYU_CONTROL_DIR': self.control_path,
                'PAYU_RUN_ID': self.run_id,
                'PAYU_CURRENT_RUN': self.counter,
                'PAYU_N_RUNS':  self.n_runs,
                'PAYU_JOB_STATUS': rc,
                'PAYU_START_TIME': self.start_time.isoformat(),
                'PAYU_FINISH_TIME': self.finish_time.isoformat(),
                'PAYU_WALLTIME': "{0} s".format(
                    (self.finish_time - self.start_time).total_seconds()
                ),
            }
        )

        # Dump job info
        with open(self.job_fname, 'w') as file:
            file.write(yaml.dump(info, default_flow_style=False))

        # Remove any empty output files (e.g. logs)
        for fname in os.listdir(self.work_path):
            fpath = os.path.join(self.work_path, fname)
            if os.path.getsize(fpath) == 0:
                os.remove(fpath)

        # Clean up any profiling output
        # TODO: Move after `rc` code check?
        for prof in self.profilers:
            prof.postprocess()

        # TODO: Need a model-specific cleanup method call here
        # NOTE: This does not appear to catch hanging jobs killed by PBS
        if rc != 0:
            # Backup logs for failed runs
            error_log_dir = os.path.join(self.archive_path, 'error_logs')
            mkdir_p(error_log_dir)

            # NOTE: This is PBS-specific
            job_id = get_job_id(short=False)

            if job_id == '':
                job_id = self.run_id[:6]

            for fname in self.output_fnames:

                src = os.path.join(self.control_path, fname)

                stem, suffix = os.path.splitext(fname)
                dest = os.path.join(error_log_dir,
                                    ".".join((stem, job_id)) + suffix)

                print(src, dest)

                shutil.copyfile(src, dest)

            # Create the symlink to the logs if it does not exist
            make_symlink(self.archive_path, self.archive_sym_path)

            # Terminate payu
            sys.exit('payu: Model exited with error code {0}; aborting.'
                     ''.format(rc))

        # Decrement run counter on successful run
        stop_file_path = os.path.join(self.control_path, 'stop_run')
        if os.path.isfile(stop_file_path):
            assert os.stat(stop_file_path).st_size == 0
            os.remove(stop_file_path)
            print('payu: Stop file detected; terminating resubmission.')
            self.n_runs = 0
        else:
            self.n_runs -= 1

        # Move logs to archive (or delete if empty)
        for f in self.output_fnames:
            f_path = os.path.join(self.control_path, f)
            if os.path.getsize(f_path) == 0:
                os.remove(f_path)
            else:
                shutil.move(f_path, self.work_path)

        run_script = self.userscripts.get('run')
        if run_script:
            self.run_userscript(run_script)
Ejemplo n.º 28
0
    def run(self, *user_flags):

        # XXX: This was previously done in reversion
        envmod.setup()

        self.load_modules()

        f_out = open(self.stdout_fname, 'w')
        f_err = open(self.stderr_fname, 'w')

        # Set MPI environment variables
        env = self.config.get('env')

        # Explicitly check for `None`, in case of an empty `env:` entry
        if env is None:
            env = {}

        for var in env:

            if env[var] is None:
                env_value = ''
            else:
                env_value = str(env[var])

            os.environ[var] = env_value

        mpi_config = self.config.get('mpi', {})
        mpi_runcmd = mpi_config.get('runcmd', 'mpirun')

        if self.config.get('scalasca', False):
            mpi_runcmd = ' '.join(['scalasca -analyze', mpi_runcmd])

        # MPI runtime flags
        mpi_flags = mpi_config.get('flags', [])
        if not mpi_flags:
            mpi_flags = self.config.get('mpirun', [])
            # TODO: Legacy config removal warning

        if type(mpi_flags) != list:
            mpi_flags = [mpi_flags]

        # TODO: More uniform support needed here
        if self.config.get('scalasca', False):
            mpi_flags = ['\"{0}\"'.format(f) for f in mpi_flags]

        # XXX: I think this may be broken
        if user_flags:
            mpi_flags.extend(list(user_flags))

        if self.debug:
            mpi_flags.append('--debug')

        mpi_progs = []
        for model in self.models:

            # Skip models without executables (e.g. couplers)
            if not model.exec_path:
                continue

            mpi_config = self.config.get('mpi', {})
            mpi_module = mpi_config.get('module', None)

            # Update MPI library module (if not explicitly set)
            # TODO: Check for MPI library mismatch across multiple binaries
            if mpi_module is None:
                mpi_module = envmod.lib_update(model.exec_path, 'libmpi.so')

            model_prog = []

            # Our MPICH wrapper does not support a working directory flag
            if not mpi_module.startswith('mvapich'):
                model_prog.append('-wdir {0}'.format(model.work_path))

            # Append any model-specific MPI flags
            model_flags = model.config.get('mpiflags', [])
            if not isinstance(model_flags, list):
                model_prog.append(model_flags)
            else:
                model_prog.extend(model_flags)

            model_ncpus = model.config.get('ncpus')
            if model_ncpus:
                model_prog.append('-np {0}'.format(model_ncpus))

            model_npernode = model.config.get('npernode')
            # TODO: New Open MPI format?
            if model_npernode:
                if model_npernode % 2 == 0:
                    npernode_flag = ('-map-by ppr:{0}:socket'
                                     ''.format(model_npernode / 2))
                else:
                    npernode_flag = ('-map-by ppr:{0}:node'
                                     ''.format(model_npernode))

                if self.config.get('scalasca', False):
                    npernode_flag = '\"{0}\"'.format(npernode_flag)
                model_prog.append(npernode_flag)

            if self.config.get('hpctoolkit', False):
                os.environ['HPCRUN_EVENT_LIST'] = 'WALLCLOCK@5000'
                model_prog.append('hpcrun')

            for prof in self.profilers:
                if prof.runscript:
                    model_prog = model_prog.append(prof.runscript)

            model_prog.append(model.exec_prefix)
            model_prog.append(model.exec_path)

            mpi_progs.append(' '.join(model_prog))

        cmd = '{runcmd} {flags} {exes}'.format(
            runcmd=mpi_runcmd,
            flags=' '.join(mpi_flags),
            exes=' : '.join(mpi_progs)
        )

        for prof in self.profilers:
            cmd = prof.wrapper(cmd)

        # Expand shell variables inside flags
        if self.expand_shell_vars:
            cmd = os.path.expandvars(cmd)

        print(cmd)

        # Our MVAPICH wrapper does not support working directories
        if mpi_module.startswith('mvapich'):
            curdir = os.getcwd()
            os.chdir(self.work_path)
        else:
            curdir = None

        # NOTE: This may not be necessary, since env seems to be getting
        # correctly updated.  Need to look into this.
        if env:
            # TODO: Replace with mpirun -x flag inputs
            proc = sp.Popen(shlex.split(cmd), stdout=f_out, stderr=f_err,
                            env=os.environ.copy())
            proc.wait()
            rc = proc.returncode
        else:
            rc = sp.call(shlex.split(cmd), stdout=f_out, stderr=f_err)

        # Return to control directory
        if curdir:
            os.chdir(curdir)

        if self.runlog:
            self.runlog.commit()

        f_out.close()
        f_err.close()

        # Remove any empty output files (e.g. logs)
        for fname in os.listdir(self.work_path):
            fpath = os.path.join(self.work_path, fname)
            if os.path.getsize(fpath) == 0:
                os.remove(fpath)

        # Clean up any profiling output
        # TODO: Move after `rc` code check?
        for prof in self.profilers:
            prof.postprocess()

        # TODO: Need a model-specific cleanup method call here
        # NOTE: This does not appear to catch hanging jobs killed by PBS
        if rc != 0:
            # Backup logs for failed runs
            error_log_dir = os.path.join(self.archive_path, 'error_logs')
            mkdir_p(error_log_dir)

            # NOTE: This is PBS-specific
            job_id = os.environ.get('PBS_JOBID', '')

            for fname in (self.stdout_fname, self.stderr_fname):
                src = os.path.join(self.control_path, fname)

                # NOTE: This assumes standard .out/.err extensions
                dest = os.path.join(error_log_dir,
                                    fname[:-4] + '.' + job_id + fname[-4:])
                print(src, dest)

                shutil.copyfile(src, dest)

            # Create the symlink to the logs if it does not exist
            make_symlink(self.archive_path, self.archive_sym_path)

            # Terminate payu
            sys.exit('payu: Model exited with error code {0}; aborting.'
                     ''.format(rc))

        # Decrement run counter on successful run
        stop_file_path = os.path.join(self.control_path, 'stop_run')
        if os.path.isfile(stop_file_path):
            assert os.stat(stop_file_path).st_size == 0
            os.remove(stop_file_path)
            print('payu: Stop file detected; terminating resubmission.')
            self.n_runs = 0
        else:
            self.n_runs -= 1

        # Move logs to archive (or delete if empty)
        for f in (self.stdout_fname, self.stderr_fname):
            f_path = os.path.join(self.control_path, f)
            if os.path.getsize(f_path) == 0:
                os.remove(f_path)
            else:
                shutil.move(f_path, self.work_path)

        run_script = self.userscripts.get('run')
        if run_script:
            self.run_userscript(run_script)
Ejemplo n.º 29
0
    def setup(self):

        cpl_keys = {
            'cice': ('input_ice.nml', 'coupling_nml', 'runtime0'),
            'matm': ('input_atm.nml', 'coupling', 'truntime0')
        }

        # Keep track of this in order to set the oasis runtime.
        run_runtime = 0

        for model in self.expt.models:

            if model.model_type == 'cice':

                # Stage the supplemental input files
                if model.prior_restart_path:
                    for f_name in model.access_restarts:
                        f_src = os.path.join(model.prior_restart_path, f_name)
                        f_dst = os.path.join(model.work_input_path, f_name)

                        if os.path.isfile(f_src):
                            make_symlink(f_src, f_dst)

            if model.model_type in ('cice', 'matm'):

                # Update the supplemental OASIS namelists
                cpl_fname, cpl_group, runtime0_key = cpl_keys[model.model_type]

                cpl_fpath = os.path.join(model.work_path, cpl_fname)
                cpl_nml = f90nml.read(cpl_fpath)

                # Which calendar are we using, noleap or Gregorian.
                caltype = cpl_nml[cpl_group]['caltype']
                init_date = cal.int_to_date(cpl_nml[cpl_group]['init_date'])

                # Get time info about the beginning of this run. We're
                # interested in:
                #   1. start date of run
                #   2. total runtime of all previous runs.
                if model.prior_output_path and not self.expt.repeat_run:

                    prior_cpl_fpath = os.path.join(model.prior_output_path,
                                                   cpl_fname)
                    prior_cpl_nml = f90nml.read(prior_cpl_fpath)
                    cpl_nml_grp = prior_cpl_nml[cpl_group]

                    # The total time in seconds since the beginning of
                    # the experiment.
                    total_runtime = int(cpl_nml_grp[runtime0_key] +
                                        cpl_nml_grp['runtime'])
                    run_start_date = cal.date_plus_seconds(
                        init_date, total_runtime, caltype)

                else:
                    total_runtime = 0
                    run_start_date = init_date

                # Get new runtime for this run. We get this from either the
                # 'runtime' part of the payu config, or from the namelist
                if self.expt.runtime:
                    run_runtime = cal.runtime_from_date(
                        run_start_date, self.expt.runtime['years'],
                        self.expt.runtime['months'], self.expt.runtime['days'],
                        self.expt.runtime.get('seconds', 0), caltype)
                else:
                    run_runtime = cpl_nml[cpl_group]['runtime']

                # Now write out new run start date and total runtime.
                cpl_nml[cpl_group]['inidate'] = cal.date_to_int(run_start_date)
                cpl_nml[cpl_group][runtime0_key] = total_runtime
                cpl_nml[cpl_group]['runtime'] = int(run_runtime)

                if model.model_type == 'cice':
                    if self.expt.counter and not self.expt.repeat_run:
                        cpl_nml[cpl_group]['jobnum'] = 1 + self.expt.counter
                    else:
                        cpl_nml[cpl_group]['jobnum'] = 1

                nml_work_path = os.path.join(model.work_path, cpl_fname)
                f90nml.write(cpl_nml, nml_work_path + '~')
                shutil.move(nml_work_path + '~', nml_work_path)

        # Now change the oasis runtime. This needs to be done after the others.
        for model in self.expt.models:
            if model.model_type == 'oasis':
                namcouple = os.path.join(model.work_path, 'namcouple')

                s = ''
                with open(namcouple, 'r+') as f:
                    s = f.read()
                    m = re.search(r"^[ \t]*\$RUNTIME.*?^[ \t]*(\d+)", s,
                                  re.MULTILINE | re.DOTALL)
                    assert (m is not None)
                    s = s[:m.start(1)] + str(run_runtime) + s[m.end(1):]

                with open(namcouple, 'w') as f:
                    f.write(s)