def test_create_checkpoint_without_netcdf(self, tmp_path): """Test that a checkpoint can be created when there are no outputs """ # define a yaml with NO outputs, but checkpoint p = utilities.yaml_from_dict(tmp_path, 'input.yaml', {'save_checkpoint': True}) _delta = DeltaModel(input_file=p) # replace eta with a random field for checkpointing success check _rand_field = np.random.uniform(0, 1, size=_delta.eta.shape) _delta.eta = _rand_field _delta._save_time_since_checkpoint = float("inf") _delta.output_checkpoint() # force another checkpoint _delta.finalize() # should be no file assert not os.path.isfile(os.path.join( _delta.prefix, 'pyDeltaRCM_output.nc')) # can be resumed p = utilities.yaml_from_dict(tmp_path, 'input.yaml', {'save_checkpoint': True, 'resume_checkpoint': True}) _delta = DeltaModel(input_file=p) # check that fields match assert np.all(_delta.eta == _rand_field)
def test_load_checkpoint_with_netcdf(self, tmp_path): """Test that a run can be resumed when there are outputs. """ # define a yaml with outputs (defaults will output strata) p = utilities.yaml_from_dict(tmp_path, 'input.yaml', {'save_checkpoint': True, 'save_eta_grids': True}) _delta = DeltaModel(input_file=p) # replace eta with a random field for checkpointing success check _rand_field = np.random.uniform(0, 1, size=_delta.eta.shape) _delta.eta = _rand_field _delta._save_time_since_checkpoint = float("inf") _delta.output_checkpoint() # force another checkpoint _delta.finalize() # paths exists assert os.path.isfile(os.path.join( _delta.prefix, 'pyDeltaRCM_output.nc')) assert os.path.isfile(os.path.join( _delta.prefix, 'checkpoint.npz')) _delta = [] # clear # can be resumed p = utilities.yaml_from_dict(tmp_path, 'input.yaml', {'save_checkpoint': True, 'resume_checkpoint': True}) _delta = DeltaModel(input_file=p) # check that fields match assert np.all(_delta.eta == _rand_field)
def test_multi_checkpoints(self, tmp_path): """Test using checkpoints multiple times for a given model run.""" # define a yaml for the base model run file_name = 'base_run.yaml' base_p, base_f = utilities.create_temporary_file(tmp_path, file_name) utilities.write_parameter_to_file(base_f, 'save_eta_grids', True) utilities.write_parameter_to_file(base_f, 'save_checkpoint', True) utilities.write_parameter_to_file(base_f, 'out_dir', tmp_path / 'test') base_f.close() baseModel = DeltaModel(input_file=base_p) # run base for 2 timesteps for _ in range(0, 50): baseModel.update() baseModel.finalize() # try defining a new model but plan to load checkpoint from baseModel file_name = 'base_run.yaml' base_p, base_f = utilities.create_temporary_file(tmp_path, file_name) utilities.write_parameter_to_file(base_f, 'save_eta_grids', True) utilities.write_parameter_to_file(base_f, 'save_checkpoint', True) utilities.write_parameter_to_file(base_f, 'resume_checkpoint', True) utilities.write_parameter_to_file(base_f, 'out_dir', tmp_path / 'test') base_f.close() resumeModel = DeltaModel(input_file=base_p) assert resumeModel.time <= baseModel.time # advance it more steps for _ in range(0, 25): resumeModel.update() resumeModel.finalize() # create another resume model resumeModel02 = DeltaModel(input_file=base_p) assert resumeModel02.time <= resumeModel.time # should be same # step it some more nt_resume02 = 0 while (resumeModel02._save_time_since_data != 0) or (nt_resume02 < 50): resumeModel02.update() nt_resume02 += 1 # assert that output netCDF4 exists exp_path_nc = os.path.join(tmp_path / 'test', 'pyDeltaRCM_output.nc') assert os.path.isfile(exp_path_nc) # load it into memory and check values in the netCDF4 output = Dataset(exp_path_nc, 'r', allow_pickle=True) out_vars = output.variables.keys() # check that expected variables are in the file assert 'x' in out_vars assert 'y' in out_vars assert 'time' in out_vars assert 'eta' in out_vars # check attributes of variables assert output['time'][0].tolist() == 0.0 assert output['time'][-1].tolist() == resumeModel02.time
def test_load_checkpoint_with_open_netcdf_win(self, tmp_path): """Test what happens if output netCDF file is actually open. This is not the same as when the netCDF file is open in another process. That situation raises an error for all OS. """ # define a yaml with outputs (defaults will output strata) p = utilities.yaml_from_dict(tmp_path, 'input.yaml', { 'save_checkpoint': True, 'save_eta_grids': True }) _delta = DeltaModel(input_file=p) # replace eta with a random field for checkpointing success check _rand_field = np.random.uniform(0, 1, size=_delta.eta.shape) _delta.eta = _rand_field _delta._save_time_since_checkpoint = float("inf") _delta.output_checkpoint() # force another checkpoint _delta.finalize() # paths exists assert os.path.isfile( os.path.join(_delta.prefix, 'pyDeltaRCM_output.nc')) assert os.path.isfile(os.path.join(_delta.prefix, 'checkpoint.npz')) _prefix = _delta.prefix _delta = [] # clear # open the netCDF file _opened = Dataset(os.path.join(_prefix, 'pyDeltaRCM_output.nc'), 'r+', format='NETCDF4') assert type(_opened) == Dataset assert 'eta' in _opened.variables.keys() # saved grid is the initial one, before random field was assigned assert np.all(_opened.variables['eta'][:].data != _rand_field) # can be resumed p = utilities.yaml_from_dict( tmp_path, 'input.yaml', { 'save_checkpoint': True, 'resume_checkpoint': True, 'save_eta_grids': True }) # raises a permissions error on Windows with pytest.raises(PermissionError): _ = DeltaModel(input_file=p)
def test_load_checkpoint_without_netcdf(self, tmp_path): """Test that a run can be resumed when there are outputs. """ # define a yaml with NO outputs, but checkpoint p = utilities.yaml_from_dict(tmp_path, 'input.yaml', {'save_checkpoint': True}) _delta = DeltaModel(input_file=p) # replace eta with a random field for checkpointing success check _rand_field = np.random.uniform(0, 1, size=_delta.eta.shape) _delta.eta = _rand_field _delta._save_time_since_checkpoint = float("inf") _delta.output_checkpoint() # force another checkpoint _delta.finalize() # should be no nc file but should be a checkpoint file assert not os.path.isfile(os.path.join( _delta.prefix, 'pyDeltaRCM_output.nc')) assert os.path.isfile(os.path.join( _delta.prefix, 'checkpoint.npz')) # now try to resume, will WARN on not finding netcdf p = utilities.yaml_from_dict(tmp_path, 'input.yaml', {'save_checkpoint': True, 'save_eta_grids': True, 'resume_checkpoint': True}) with pytest.warns(UserWarning, match=r'NetCDF4 output *.'): _delta = DeltaModel(input_file=p) # assert that a new output file exists file exists assert os.path.isfile(os.path.join( _delta.prefix, 'pyDeltaRCM_output.nc')) assert os.path.isfile(os.path.join( _delta.prefix, 'checkpoint.npz')) # check that fields match assert np.all(_delta.eta == _rand_field)
def test_checkpoint_nc(self, tmp_path): """Test the netCDF that is written to by the checkpointing.""" # define a yaml for the base model run file_name = 'base_run.yaml' base_p, base_f = utilities.create_temporary_file(tmp_path, file_name) utilities.write_parameter_to_file(base_f, 'out_dir', tmp_path / 'test') utilities.write_parameter_to_file(base_f, 'save_eta_grids', True) utilities.write_parameter_to_file(base_f, 'save_depth_grids', True) utilities.write_parameter_to_file(base_f, 'save_discharge_grids', True) utilities.write_parameter_to_file(base_f, 'save_sandfrac_grids', True) utilities.write_parameter_to_file(base_f, 'save_checkpoint', True) base_f.close() baseModel = DeltaModel(input_file=base_p) # run for some base number of steps nt_base = 50 for _ in range(0, 50): baseModel.update() # force the model run to end immmediately after exporting a checkpoint nt_var = 0 while (baseModel._save_time_since_checkpoint != 0): baseModel.update() nt_var += 1 # then finalize baseModel.finalize() # check that the time makes sense assert baseModel.time == baseModel._dt * (nt_base + nt_var) # try defining a new model but plan to load checkpoint from baseModel file_name = 'base_run.yaml' base_p, base_f = utilities.create_temporary_file(tmp_path, file_name) utilities.write_parameter_to_file(base_f, 'out_dir', tmp_path / 'test') utilities.write_parameter_to_file(base_f, 'save_eta_grids', True) utilities.write_parameter_to_file(base_f, 'save_depth_grids', True) utilities.write_parameter_to_file(base_f, 'save_discharge_grids', True) utilities.write_parameter_to_file(base_f, 'save_sandfrac_grids', True) utilities.write_parameter_to_file(base_f, 'save_checkpoint', False) utilities.write_parameter_to_file(base_f, 'resume_checkpoint', True) base_f.close() resumeModel = DeltaModel(input_file=base_p) assert resumeModel.time == baseModel.time # same when resumed # advance it until output_data has been called again nt_resume = 0 while (resumeModel._save_time_since_data != 0) or (nt_resume < 50): resumeModel.update() nt_resume += 1 resumeModel.finalize() assert nt_resume > 0 assert resumeModel.time > baseModel.time # assert that output netCDF4 exists exp_path_nc = os.path.join(tmp_path / 'test', 'pyDeltaRCM_output.nc') assert os.path.isfile(exp_path_nc) # load it into memory and check values in the netCDF4 output = Dataset(exp_path_nc, 'r', allow_pickle=True) out_vars = output.variables.keys() # check that expected variables are in the file assert 'x' in out_vars assert 'y' in out_vars assert 'time' in out_vars assert 'eta' in out_vars assert 'depth' in out_vars assert 'discharge' in out_vars assert 'sandfrac' in out_vars # check attributes of variables assert output['time'][0].tolist() == 0.0 assert output['time'][-1] == resumeModel.time assert output['time'][-1].tolist() == resumeModel._dt * \ (nt_base + nt_var + nt_resume) assert output['eta'][0].shape == resumeModel.eta.shape assert output['eta'][-1].shape == resumeModel.eta.shape assert output['depth'][-1].shape == resumeModel.eta.shape assert output['discharge'][-1].shape == resumeModel.eta.shape assert output['sandfrac'][-1].shape == resumeModel.eta.shape # check the metadata assert output['meta']['L0'][:] == resumeModel.L0 assert output['meta']['N0'][:] == resumeModel.N0 assert output['meta']['CTR'][:] == resumeModel.CTR assert output['meta']['dx'][:] == resumeModel.dx assert output['meta']['h0'][:] == resumeModel.h0 assert np.all(output['meta']['cell_type'][:] == resumeModel.cell_type) assert output['meta']['H_SL'][-1].data == resumeModel.H_SL assert output['meta']['f_bedload'][-1].data == resumeModel.f_bedload C0_from_file = float(output['meta']['C0_percent'][-1].data) assert pytest.approx(C0_from_file) == resumeModel.C0_percent assert output['meta']['u0'][-1].data == resumeModel.u0 # checkpoint interval aligns w/ timestep dt so these should match assert output['time'][-1].tolist() == resumeModel.time
def test_load_checkpoint_with_open_netcdf(self, tmp_path): """Test what happens if output netCDF file is actually open. This is not the same as when the netCDF file is open in another process. That situation raises an error for all OS. """ # define a yaml with outputs (defaults will output strata) p = utilities.yaml_from_dict(tmp_path, 'input.yaml', {'save_checkpoint': True, 'save_eta_grids': True}) _delta = DeltaModel(input_file=p) # replace eta with a random field for checkpointing success check _rand_field = np.random.uniform(0, 1, size=_delta.eta.shape) _delta.eta = _rand_field _delta._save_time_since_checkpoint = float("inf") _delta.output_checkpoint() # force another checkpoint _delta.finalize() # paths exists assert os.path.isfile(os.path.join( _delta.prefix, 'pyDeltaRCM_output.nc')) assert os.path.isfile(os.path.join( _delta.prefix, 'checkpoint.npz')) _prefix = _delta.prefix _delta = [] # clear # open the netCDF file _opened = Dataset(os.path.join(_prefix, 'pyDeltaRCM_output.nc'), 'r+', format='NETCDF4') assert type(_opened) == Dataset assert 'eta' in _opened.variables.keys() # saved grid is the initial one, before random field was assigned assert np.all(_opened.variables['eta'][:].data != _rand_field) # can be resumed p = utilities.yaml_from_dict(tmp_path, 'input.yaml', {'save_checkpoint': True, 'resume_checkpoint': True, 'save_eta_grids': True}) _delta = DeltaModel(input_file=p) # force save grids/figs _delta.save_grids_and_figs() # check that fields match assert np.all(_delta.eta == _rand_field) # assert that old netCDF object is still around and hasn't changed assert type(_opened) == Dataset assert 'eta' in _opened.variables.keys() # grid from old netCDF is initial one, before random field was assigned assert np.all(_opened.variables['eta'][:].data != _rand_field) # clear delta _delta = [] # open the new netCDF file _new = Dataset(os.path.join(_prefix, 'pyDeltaRCM_output.nc'), 'r+', format='NETCDF4') # first grid should be the OG one assert np.all(_opened['eta'][:].data == _new['eta'][0, :, :].data) # random field should be saved in the new netCDF file # some rounding/truncation happens in the netCDF so we use approx assert pytest.approx(_rand_field) == _new['eta'][1, :, :].data
def test_load_ckpt_wo_netcdf_parallel_spinup_to_matrix(self, tmp_path): """ Test that multiple matrix runs can be resumed from a single checkpoint file, and take advantage of the preprocessor parallel infrastructure. """ # define a yaml with an output and checkpoint p = utilities.yaml_from_dict(tmp_path, 'input.yaml', {'save_checkpoint': True, 'save_eta_grids': True}) baseModel = DeltaModel(input_file=p) # run base for 2 timesteps for _ in range(0, 50): baseModel.update() baseModel.finalize() # get the number of times the data has been output in the base file bmsi = baseModel._save_iter assert bmsi > 0 # check that files exist, and then delete nc assert os.path.isfile(os.path.join( baseModel.prefix, 'pyDeltaRCM_output.nc')) assert os.path.isfile(os.path.join( baseModel.prefix, 'checkpoint.npz')) # open the file and check dimensions exp_nc_path = os.path.join(baseModel.prefix, 'pyDeltaRCM_output.nc') output = Dataset(exp_nc_path, 'r', allow_pickle=True) out_vars = output.variables.keys() # check that expected variables are in the file assert 'x' in out_vars assert 'y' in out_vars assert 'time' in out_vars assert 'eta' in out_vars # check attributes of variables assert output['time'].shape[0] == bmsi ######################## # set up a matrix of runs resume_dict = {'save_checkpoint': False, 'resume_checkpoint': True, 'save_eta_grids': True, 'out_dir': os.path.join(tmp_path, 'matrix'), 'parallel': 4} _matrix = {'f_bedload': [0.1, 0.2, 0.5, 1]} resume_dict['matrix'] = _matrix # let the preprocessor write the initial matrix and # create a new output netcdf file pp = preprocessor.Preprocessor( resume_dict, timesteps=25) # 2000 # now copy the checkpoint for j, j_file in enumerate(pp.file_list): # get folder j_folder = j_file.parent # copy the spinup checkpoint to each of the folders shutil.copy( src=os.path.join(baseModel.prefix, 'checkpoint.npz'), dst=os.path.join(tmp_path, 'matrix', j_folder)) pp.run_jobs() # first job save dimension fjsi = pp.job_list[0].deltamodel._save_iter # proceed with assertions for j, j_job in enumerate(pp.job_list): exp_nc_path = os.path.join( tmp_path, 'matrix', j_folder, 'pyDeltaRCM_output.nc') assert os.path.isfile(exp_nc_path) # check all jobs have same dimensionality jsi = j_job.deltamodel._save_iter assert jsi == fjsi # open the file and check dimensions output = Dataset(exp_nc_path, 'r', allow_pickle=True) out_vars = output.variables.keys() # check that expected variables are in the file assert 'x' in out_vars assert 'y' in out_vars assert 'time' in out_vars assert 'eta' in out_vars # check attributes of variables # this is the critical check, that the dimension of the second # netcdf is not expanded to begin at _save_ter from the initial # basemodel assert output['time'].shape[0] <= (bmsi // 2)
def test_simple_checkpoint(self, tmp_path): """Test checkpoint vs a base run. Also, checks resumed model against another checkpoint run. """ # define a yaml for the longer model run file_name = 'base_run.yaml' base_p, base_f = utilities.create_temporary_file(tmp_path, file_name) utilities.write_parameter_to_file(base_f, 'out_dir', tmp_path / 'test') utilities.write_parameter_to_file(base_f, 'save_checkpoint', True) base_f.close() longModel = DeltaModel(input_file=base_p) # run for some number of updates for _ in range(0, 50): longModel.update() longModel.finalize() # try defining a new model but plan to load checkpoint from longModel file_name = 'base_run.yaml' base_p, base_f = utilities.create_temporary_file(tmp_path, file_name) utilities.write_parameter_to_file(base_f, 'out_dir', tmp_path / 'test') utilities.write_parameter_to_file(base_f, 'resume_checkpoint', True) base_f.close() resumeModel = DeltaModel(input_file=base_p) # advance the resumed model until it catch up to longModel assert resumeModel.time < longModel.time while resumeModel._time < longModel._time: resumeModel.update() resumeModel.finalize() # the longModel and resumeModel should match assert longModel.time == resumeModel.time assert np.all(longModel.eta == resumeModel.eta) assert np.all(longModel.uw == resumeModel.uw) assert np.all(longModel.ux == resumeModel.ux) assert np.all(longModel.uy == resumeModel.uy) assert np.all(longModel.depth == resumeModel.depth) assert np.all(longModel.stage == resumeModel.stage) assert np.all(longModel.sand_frac == resumeModel.sand_frac) assert np.all(longModel.active_layer == resumeModel.active_layer) # define another model that loads the checkpoint file_name = 'base_run.yaml' base_p, base_f = utilities.create_temporary_file(tmp_path, file_name) utilities.write_parameter_to_file(base_f, 'out_dir', tmp_path / 'test') utilities.write_parameter_to_file(base_f, 'resume_checkpoint', True) base_f.close() resumeModel2 = DeltaModel(input_file=base_p) # advance the resumed model until it catch up to longModel while resumeModel2._time < resumeModel._time: resumeModel2.update() resumeModel2.finalize() # the two models that resumed from the checkpoint should be the same assert resumeModel2.time == resumeModel.time assert np.all(resumeModel2.uw == resumeModel.uw) assert np.all(resumeModel2.ux == resumeModel.ux) assert np.all(resumeModel2.uy == resumeModel.uy) assert np.all(resumeModel2.depth == resumeModel.depth) assert np.all(resumeModel2.stage == resumeModel.stage) assert np.all(resumeModel2.sand_frac == resumeModel.sand_frac) assert np.all(resumeModel2.active_layer == resumeModel.active_layer)
def test_checkpoint_diff_dt(self, tmp_path): """Test when checkpoint_dt does not match dt or save_dt.""" # define a yaml for the base model run file_name = 'base_run.yaml' base_p, base_f = utilities.create_temporary_file(tmp_path, file_name) utilities.write_parameter_to_file(base_f, 'save_eta_grids', True) utilities.write_parameter_to_file(base_f, 'save_depth_grids', True) utilities.write_parameter_to_file(base_f, 'save_discharge_grids', True) utilities.write_parameter_to_file(base_f, 'save_checkpoint', True) utilities.write_parameter_to_file(base_f, 'out_dir', tmp_path / 'test') base_f.close() baseModel = DeltaModel(input_file=base_p) # modify the checkpoint dt to be different than save_dt baseModel._checkpoint_dt = (baseModel.save_dt * 0.65) for _ in range(0, 50): baseModel.update() baseModel.finalize() assert baseModel.time == baseModel._dt * 50 baseModelSavedTime = (baseModel.time - baseModel._save_time_since_checkpoint) assert baseModelSavedTime > 0 # try defining a new model but plan to load checkpoint from baseModel file_name = 'base_run.yaml' base_p, base_f = utilities.create_temporary_file(tmp_path, file_name) utilities.write_parameter_to_file(base_f, 'save_eta_grids', True) utilities.write_parameter_to_file(base_f, 'save_depth_grids', True) utilities.write_parameter_to_file(base_f, 'save_discharge_grids', True) utilities.write_parameter_to_file(base_f, 'save_checkpoint', False) utilities.write_parameter_to_file(base_f, 'resume_checkpoint', True) utilities.write_parameter_to_file(base_f, 'out_dir', tmp_path / 'test') base_f.close() resumeModel = DeltaModel(input_file=base_p) assert resumeModel.time == baseModelSavedTime # advance until some steps and just saved nt_resume = 0 while (resumeModel._save_time_since_data != 0) or (nt_resume < 50): resumeModel.update() nt_resume += 1 resumeModel.finalize() # assert that output netCDF4 exists exp_path_nc = os.path.join(tmp_path / 'test', 'pyDeltaRCM_output.nc') assert os.path.isfile(exp_path_nc) # load it into memory and check values in the netCDF4 output = Dataset(exp_path_nc, 'r', allow_pickle=True) out_vars = output.variables.keys() # check that expected variables are in the file assert 'x' in out_vars assert 'y' in out_vars assert 'time' in out_vars assert 'eta' in out_vars assert 'depth' in out_vars assert 'discharge' in out_vars # check attributes of variables assert output['time'][0].tolist() == 0.0 assert output['time'][-1].tolist() == resumeModel.time