def func_start_values(*args): v_names = args[-1] d = {v_names[i]: args[i].reshape((-1, )) for i in range(len(args[:-1]))} # print([v.shape for v in d.values()]) print('lat:', d['lat'], 'lon:', d['lon'], 'prob:', d['prob'], flush=True) # print('time:', d['time'], flush=True) # print([v.shape for v in d.values()], flush=True) start_values = CARDAMOMlib.load_start_values_greg_dict(d) ds_res = CARDAMOMlib.compute_ds_pwc_mr_fd_greg( ds_single, integration_method='trapezoidal', nr_nodes=51) # print(start_values, flush=True) # start_values = da.from_array(1.1 * np.ones((1, 1, 1, 6), dtype=np.float64, chunks=(1,1,6)) # start_values.to_dask() return start_values.reshape(1, 1, 1, 6)
def func_pwc_mr_fd(ds_single): # print(ds_single) ds_res = CARDAMOMlib.compute_ds_pwc_mr_fd_greg(ds_single, comp_dict) write_to_logfile("finished single,", "lat:", ds_single.lat.data, "lon:", ds_single.lon.data, "prob:", ds_single.prob.data) return ds_res
def func_pwc_mr_fd(ds_single): # print(ds_single) ds_res = CARDAMOMlib.compute_ds_pwc_mr_fd_greg( ds_single, # integration_method='solve_ivp', integration_method='trapezoidal', nr_nodes=51) write_to_logfile("finished single,", "lat:", ds_single.lat.data, "lon:", ds_single.lon.data, "prob:", ds_single.prob.data) return ds_res
def func_us(*args): v_names = args[-1] d = {v_names[i]: args[i].reshape((-1,)) for i in range(len(args[:-1]))} # print([v.shape for v in d.values()]) print('lat:', d['lat'], 'lon:', d['lon'], 'prob:', d['prob'], flush=True) # print('time:', d['time'], flush=True) # print([v.shape for v in d.values()], flush=True) us = CARDAMOMlib.load_us_greg_dict(d) # print(start_values, flush=True) return us.reshape(1, 1, 1, len(d['time']), 6)
def func_start_values(*args): v_names = args[-1] d = {v_names[i]: args[i].reshape((-1, )) for i in range(len(args[:-1]))} # print([v.shape for v in d.values()]) print('lat:', d['lat'], 'lon:', d['lon'], 'prob:', d['prob'], flush=True) # print('time:', d['time'], flush=True) # print([v.shape for v in d.values()], flush=True) start_values = CARDAMOMlib.load_start_values_greg_dict(d) # print(start_values, flush=True) # start_values = da.from_array(1.1 * np.ones((1, 1, 1, 6), dtype=np.float64, chunks=(1,1,6)) # start_values.to_dask() return start_values.reshape(1, 1, 1, nr_pools)
def func_Bs(*args): v_names = args[-1] d = {v_names[i]: args[i].reshape((-1, )) for i in range(len(args[:-1]))} # print([v.shape for v in d.values()]) # print('lat:', d['lat'], 'lon:', d['lon'], 'prob:', d['prob'], flush=True) # print('time:', d['time'], flush=True) # print([v.shape for v in d.values()], flush=True) Bs = CARDAMOMlib.load_Bs_greg_dict(d, integration_method="trapezoidal", nr_nodes=51) # print(start_values, flush=True) write_to_logfile("finished single,", "lat:", d["lat"], "lon:", d["lon"], "prob:", d["prob"]) return Bs.reshape(1, 1, 1, len(d['time']), 6, 6)
def func_pwc_mr_fd(ds_single): # print(ds_single) with warnings.catch_warnings(): warnings.simplefilter("ignore") ds_res = CARDAMOMlib.compute_ds_pwc_mr_fd_greg( ds_single, # integration_method='solve_ivp', integration_method='trapezoidal', nr_nodes=151, errors=errors) write_to_logfile("finished single,", "lat:", ds_single.lat.data, "lon:", ds_single.lon.data, "prob:", ds_single.prob.data) return ds_res
def func_data_consistency(ds_single): # print(ds_single) mdo = CARDAMOMlib.load_mdo_greg(ds_single) abs_err, rel_err = mdo.check_data_consistency() data_vars = dict() data_vars['abs_err'] = xr.DataArray( data=abs_err.data.filled(fill_value=np.nan), attrs={'units': abs_err.unit} # ignored by map_blocks ) data_vars['rel_err'] = xr.DataArray( data=rel_err.data.filled(fill_value=np.nan), attrs={'units': rel_err.unit} # ignored by map_blocks ) ds_res = xr.Dataset(data_vars=data_vars) ds_single.close() return ds_res
warnings.simplefilter("ignore") # + data_path = Path("/home/data/CARDAMOM/Greg_2020_10_26/") netCDF_filestem = "sol_acc_age_btt" data_combinations = [ ("monthly", None, "discrete"), ("monthly", None, "continuous"), ] datasets = dict() for dc in data_combinations: time_resolution, delay_in_months, model_type = dc params = CARDAMOMlib.load_params(time_resolution, delay_in_months) output_path = data_path.joinpath( data_path.joinpath(params["output_folder"])) project_path = output_path.joinpath(model_type) ds_path = project_path.joinpath(netCDF_filestem) print(dc, ds_path) datasets[dc] = xr.open_mfdataset(str(ds_path) + "*.nc") # - ds1 = datasets[('monthly', None, 'discrete')].isel(prob=[0]) ds2 = datasets[('monthly', None, 'continuous')].isel(prob=[0]) # + fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(12, 18)) var_names = ["xs", "solution"]
#variable_paths = [p for p in zarr_path.iterdir() if p.is_dir()] # #variable_names = [] #variables = [] #for variable_path in variable_paths: # variable_names.append(variable_path.name) # variables.append(da.from_zarr(str(variable_path))) # + #ds = xr.merge(dss) #ds = xr.open_zarr(data_folder + filestem + "zarr_version/26_78.00_95.00") # #ds # + ms = CARDAMOMlib.load_model_structure_greg() def make_fake_ds(dataset): # fake start_values data fake_data_sv = np.zeros( (len(dataset.lat), len(dataset.lon), len(dataset.prob), ms.nr_pools)) coords_pool = [d['pool_name'] for d in ms.pool_structure] fake_coords_sv = { 'lat': dataset.lat.data, 'lon': dataset.lon.data, 'prob': dataset.prob.data, 'pool': coords_pool }
def func(single_site_ds): # print(single_site_ds) res = CARDAMOMlib.compute_pwc_mr_fd_ds(single_site_ds) return res
def func_Rs(*args): v_names = args[-1] d = {v_names[i]: args[i].reshape((-1, )) for i in range(len(args[:-1]))} Rs = CARDAMOMlib.load_Rs_greg_dict(d) return Rs.reshape(1, 1, 1, nr_times, nr_pools)
def compute_pwc_mr_fd_for_one_prob(prob_nr): data_folder = "/home/data/CARDAMOM/" # matagorda, antakya filestem = "Greg_2020_10_26/" output_folder = "output/" #pwc_mr_fd_archive = data_folder + output_folder + 'pwc_mr_fd/' logfilename = data_folder + filestem + output_folder + "pwc_mr_fd_%04d.log" % prob_nr # ds = xr.open_mfdataset(data_folder + filestem + "SUM*.nc") ds = xr.open_dataset(data_folder + filestem + "small_netcdf/" + "rechunked.nc") #ds # In[4]: ms = CARDAMOMlib.load_model_structure_greg() def make_fake_ds(dataset): # fake start_values data fake_data_sv = np.zeros((len(dataset.lat), len(dataset.lon), len(dataset.prob), ms.nr_pools)) coords_pool = [d['pool_name'] for d in ms.pool_structure] fake_coords_sv = { 'lat': dataset.lat.data, 'lon': dataset.lon.data, 'prob': dataset.prob.data, 'pool': coords_pool } fake_array_sv = xr.DataArray(data=fake_data_sv, dims=['lat', 'lon', 'prob', 'pool'], coords=fake_coords_sv) # fake times data fake_data_times = np.zeros((len(dataset.lat), len(dataset.lon), len(dataset.prob), len(dataset.time))) fake_coords_times = { 'lat': dataset.lat.data, 'lon': dataset.lon.data, 'prob': dataset.prob.data, 'time': dataset.time.data } fake_array_times = xr.DataArray(data=fake_data_times, dims=['lat', 'lon', 'prob', 'time'], coords=fake_coords_times) # fake us data fake_data_us = np.zeros( (len(dataset.lat), len(dataset.lon), len(dataset.prob), len(dataset.time), ms.nr_pools)) fake_coords_us = { 'lat': dataset.lat.data, 'lon': dataset.lon.data, 'prob': dataset.prob.data, 'time': dataset.time.data, 'pool': coords_pool } fake_array_us = xr.DataArray( data=fake_data_us, dims=['lat', 'lon', 'prob', 'time', 'pool'], coords=fake_coords_us) # fake Bs data fake_data_Bs = np.zeros( (len(dataset.lat), len(dataset.lon), len(dataset.prob), len(dataset.time), ms.nr_pools, ms.nr_pools)) fake_coords_Bs = { 'lat': dataset.lat.data, 'lon': dataset.lon.data, 'prob': dataset.prob.data, 'time': dataset.time.data, 'pool_to': coords_pool, 'pool_from': coords_pool } fake_array_Bs = xr.DataArray( data=fake_data_Bs, dims=['lat', 'lon', 'prob', 'time', 'pool_to', 'pool_from'], coords=fake_coords_Bs) # fake log data shape = ( len(dataset.lat), len(dataset.lon), len(dataset.prob), ) fake_data_log = np.ndarray(shape, dtype="<U150") fake_coords_log = { 'lat': dataset.lat.data, 'lon': dataset.lon.data, 'prob': dataset.prob.data } fake_array_log = xr.DataArray(data=fake_data_log, dims=['lat', 'lon', 'prob'], coords=fake_coords_log) # collect fake arrays in ds fake_data_vars = dict() fake_data_vars['start_values'] = fake_array_sv fake_data_vars['times'] = fake_array_times fake_data_vars['us'] = fake_array_us fake_data_vars['Bs'] = fake_array_Bs fake_data_vars['log'] = fake_array_log fake_coords = { 'lat': dataset.lat.data, 'lon': dataset.lon.data, 'prob': dataset.prob.data, 'time': dataset.time.data, 'pool': coords_pool, 'pool_to': coords_pool, 'pool_from': coords_pool } fake_ds = xr.Dataset(data_vars=fake_data_vars, coords=fake_coords) return fake_ds # In[5]: chunk_dict = {"lat": 1, "lon": 1, 'prob': 1} #sub_chunk_dict = {'lat': 1, 'lon': 1, 'prob': 1} comp_dict = {'zlib': True, 'complevel': 9} ds_sub = ds.isel( lat=slice(0, 34, 1), # 0-33 lon=slice(0, 71, 1), # 0-70 prob=slice(prob_nr, prob_nr + 1, 1) # 0-0 ).chunk(chunk_dict) #ds_sub = ds.isel( # lat=slice(28, 30, 1), # lon=slice(38, 40, 1), # prob=slice(0, 20, 1) #).chunk(chunk_dict) #ds_sub = ds.chunk(chunk_dict) #ds_sub # In[6]: def write_to_logfile(*args): t = time.localtime() current_time = time.strftime("%H:%M:%S", t) with open(logfilename, 'a') as f: t = (current_time, ) + args f.write(" ".join([str(s) for s in t]) + '\n') # there is no multi-dimensional 'groupby' in xarray data structures def nested_groupby_apply(dataset, groupby, apply_fn, **kwargs): if len(groupby) == 1: res = dataset.groupby(groupby[0]).apply(apply_fn, **kwargs) return res else: return dataset.groupby(groupby[0]).apply(nested_groupby_apply, groupby=groupby[1:], apply_fn=apply_fn, **kwargs) def func_pwc_mr_fd(ds_single): # print(ds_single) ds_res = CARDAMOMlib.compute_ds_pwc_mr_fd_greg(ds_single, comp_dict) write_to_logfile("finished single,", "lat:", ds_single.lat.data, "lon:", ds_single.lon.data, "prob:", ds_single.prob.data) return ds_res def func_chunk(chunk_ds): # print('func_chunk', chunk_ds.lat.data, chunk_ds.lon.data) # worker = get_worker() # worker.memory_target_fraction = 0.95 # worker.memory_spill_fraction = False # worker.memory_pause_fraction = False # worker.memory_terminate_fraction = False # print(worker.memory_target_fraction, flush=True) # print(worker.memory_spill_fraction, flush=True) # print(worker.memory_pause_fraction, flush=True) # print(worker.memory_terminate_fraction, flush=True) # print('chunk started:', chunk_ds.lat[0].data, chunk_ds.lon[0].data, flush=True) res_ds = nested_groupby_apply(chunk_ds, ['lat', 'lon', 'prob'], func_pwc_mr_fd) # group_by removes the dimensions mentioned, so the resulting ds is # lower dimensional, unfortunatley, map_blocks does not do that and so # putting the sub result datasets back together becomes technically difficult # chunk_fake_ds = make_fake_ds(chunk_ds).chunk(sub_chunk_dict) # sub_chunk_ds = chunk_ds.chunk(sub_chunk_dict) # res_ds = xr.map_blocks(func_pwc_mr_fd, sub_chunk_ds, template=chunk_fake_ds) print('chunk finished:', chunk_ds.lat[0].data, chunk_ds.lon[0].data, chunk_ds.prob[0].data, flush=True) # write_to_logfile( # 'chunk finished,', # "lat:", chunk_ds.lat[0].data, # "lon:", chunk_ds.lon[0].data, # "prob:", chunk_ds.prob[0].data # ) return res_ds # In[7]: fake_ds = make_fake_ds(ds_sub).chunk(chunk_dict) ds_pwc_mr_fd = xr.map_blocks(func_chunk, ds_sub, template=fake_ds) # In[ ]: c = ds_sub.chunks nr_chunks = np.prod([len(val) for val in c.values()]) nr_singles = len(ds_sub.lat) * len(ds_sub.lon) * len(ds_sub.prob) write_to_logfile('starting:', nr_chunks, "chunks, ", nr_singles, "singles") ds_pwc_mr_fd.to_netcdf(data_folder + filestem + output_folder + "pwc_mr_fd_%04d.nc" % prob_nr, compute=True) write_to_logfile('done') # In[ ]: ds.close() del ds ds_sub.close() del ds_sub ds_pwc_mr_fd.close() del ds_pwc_mr_fd
# + import zarr import shutil import numpy as np import dask.array as da from pathlib import Path from bgc_md2.models.CARDAMOM import CARDAMOMlib from bgc_md2.notebookHelpers import write_to_logfile, custom_timeout from dask.distributed import Client # - my_cluster = CARDAMOMlib.prepare_cluster(n_workers=48) Client(my_cluster) # ## How to connect to remote # **Remark**: Port values to be adapted, see above. # # ### remotely # ` # screen # # cd GitHub/bgc_md2/notebooks/CARDAMOM # conda activate bgc_md2 # jupyter lab --no-browser -- port=8790 # ` # ### locally # ` # ssh -L 8080:localhost:8790 antakya_from_home
import dask.array as da import numpy as np import pandas as pd import xarray as xr from pathlib import Path from tqdm import tqdm from bgc_md2.models.CARDAMOM import CARDAMOMlib from dask.distributed import Client from dask import delayed # - # for monthly discrete data, each worker needs about 10GB my_cluster = CARDAMOMlib.prepare_cluster(n_workers=1, alternative_dashboard_port=8790) Client(my_cluster) # ## How to connect to remote # **Remark**: Port values to be adapted, see above. # # ### remotely # ` # screen # # cd GitHub/bgc_md2/notebooks/CARDAMOM # conda activate bgc_md2 # jupyter lab --no-browser -- port=8890 # ` # ### locally # ` # ssh -L 8080:localhost:8890 antakya_from_home
CARDAMOM_path = Path("/home/data/CARDAMOM/") intcal20_path = CARDAMOM_path.joinpath("IntCal20_Year_Delta14C.csv") Delta14C_atm_path = CARDAMOM_path.joinpath("Delta_14C_NH.csv") data_combinations = [ ("monthly", None, "discrete"), ("monthly", None, "continuous"), # only first prob computed so far ("yearly", 0, "continuous"), ("yearly", 6, "continuous") ] datasets = dict() for dc in data_combinations: time_resolution, delay_in_months, model_type = dc params = CARDAMOMlib.load_params(time_resolution, delay_in_months) output_path = data_path.joinpath( data_path.joinpath(params["output_folder"])) project_path = output_path.joinpath(model_type) ds_path = project_path.joinpath(netCDF_filestem) print(dc, ds_path) datasets[dc] = xr.open_mfdataset(str(ds_path) + "*.nc") # - ds_m = datasets[("monthly", None, "continuous")] ds_y0 = datasets[("yearly", 0, "continuous")] ds_y6 = datasets[("yearly", 6, "continuous")] ds_dmr = datasets[("monthly", None, "discrete")] # choose a site in northern Sweden, ensemble member prob=0
import matplotlib.pyplot as plt from pathlib import Path from tqdm import tqdm from bgc_md2.models.CARDAMOM import CARDAMOMlib from bgc_md2.models.ELM import ELMlib_no_vr from bgc_md2.notebook_helpers import nested_groupby_apply from dask.distributed import Client # #%autoreload 2 # - my_cluster = CARDAMOMlib.prepare_cluster(n_workers=24)#, my_user_name="hmetzler") Client(my_cluster) # ## How to connect to remote # **Remark**: Port values to be adapted, see above. # # ### remotely # ` # screen # # cd GitHub/bgc_md2/notebooks/CARDAMOM # conda activate bgc_md2 # jupyter lab --no-browser -- port=8890 # ` # ### locally # ` # ssh -L 8080:localhost:8890 antakya_from_home
netCDF_filestem = "sol_acc_age_btt" CARDAMOM_path = Path("/home/data/CARDAMOM/") intcal20_path = CARDAMOM_path.joinpath("IntCal20_Year_Delta14C.csv") data_combinations = [ ("monthly", None, "discrete"), ("monthly", None, "continuous"), # only first prob computed so far ("yearly", 0, "continuous"), ("yearly", 6, "continuous") ] datasets = dict() for dc in data_combinations: time_resolution, delay_in_months, model_type = dc params = CARDAMOMlib.load_params(time_resolution, delay_in_months) output_path = data_path.joinpath( data_path.joinpath(params["output_folder"])) project_path = output_path.joinpath(model_type) ds_path = project_path.joinpath(netCDF_filestem) print(dc, ds_path) datasets[dc] = xr.open_mfdataset(str(ds_path) + "*.nc") # - #ds = datasets[("monthly", None, "discrete")] ds = datasets[("monthly", None, "continuous")] ds # choose a site in northern Sweden, ensemble member prob=0 (lat, lon, prob) = (28, 52, 0)
# + import shutil import dask.array as da import xarray as xr from pathlib import Path from tqdm import tqdm from bgc_md2.models.CARDAMOM import CARDAMOMlib from dask.distributed import Client # - my_cluster = CARDAMOMlib.prepare_cluster( n_workers=12, # alternative_dashboard_port=8792 ) Client(my_cluster) # ## How to connect to remote # **Remark**: Port values to be adapted, see above. # # ### remotely # ` # screen # # cd GitHub/bgc_md2/notebooks/CARDAMOM # conda activate bgc_md2 # jupyter lab --no-browser -- port=8890 # ` # ### locally # `
import zarr import dask.array as da import numpy as np import matplotlib.pyplot as plt from pathlib import Path from CompartmentalSystems.pwc_model_run_fd import PWCModelRunFD from bgc_md2.models.CARDAMOM import CARDAMOMlib from dask.distributed import Client # - my_cluster = CARDAMOMlib.prepare_cluster(n_workers=48) Client(my_cluster) # ## How to connect to remote # **Remark**: Port values to be adapted, see above. # # ### remotely # ` # screen # # cd GitHub/bgc_md2/notebooks/CARDAMOM # conda activate bgc_md2 # jupyter lab --no-browser -- port=8890 # ` # ### locally # ` # ssh -L 8080:localhost:8890 antakya_from_home
import zarr import dask.array as da import numpy as np import xarray as xr from dask import delayed from pathlib import Path from tqdm import tqdm from bgc_md2.notebook_helpers import load_zarr_archive from bgc_md2.models.CARDAMOM import CARDAMOMlib from dask.distributed import Client # - my_cluster = CARDAMOMlib.prepare_cluster(n_workers=48, alternative_dashboard_port=8791) Client(my_cluster) data_path = Path("/home/data/CARDAMOM/Greg_2020_10_26/") source_path = data_path.joinpath("monthly_rechunked_zarr") target_path = data_path.joinpath("daily_rechunked_zarr") ds = xr.open_mfdataset(str(data_path) + "/SUM*.nc") ds stock_variable_names = [ "c_finelitter", "c_foliar", "c_labile", "c_root", "c_som", "c_wood" ] days_per_month = 31 def compute_target(variable_name, z_target_sliced, z_source_sliced):
import dask.array as da import numpy as np import matplotlib.pyplot as plt from pathlib import Path from CompartmentalSystems.discrete_model_run import DiscreteModelRun as DMR from bgc_md2.models.CARDAMOM import CARDAMOMlib from dask.distributed import Client # - my_cluster = CARDAMOMlib.prepare_cluster(n_workers=48, # alternative_dashboard_port=8791 ) Client(my_cluster) # ## How to connect to remote # **Remark**: Port values to be adapted, see above. # # ### remotely # ` # screen # # cd GitHub/bgc_md2/notebooks/CARDAMOM # conda activate bgc_md2 # jupyter lab --no-browser -- port=8890 # ` # ### locally # `
# + import xarray as xr import numpy as np import matplotlib.pyplot as plt from pathlib import Path from tqdm import tqdm from bgc_md2.models.CARDAMOM import CARDAMOMlib from bgc_md2.notebook_helpers import nested_groupby_apply from dask.distributed import Client # - my_cluster = CARDAMOMlib.prepare_cluster(n_workers=12) Client(my_cluster) # ## How to connect to remote # **Remark**: Port values to be adapted, see above. # # ### remotely # ` # tmux # # cd GitHub/bgc_md2/notebooks/CARDAMOM # conda activate bgc_md2 # jupyter lab --no-browser -- port=8890 # ` # ### locally # ` # ssh -L 8080:localhost:8890 antakya_from_home
# + import zarr import shutil import numpy as np from pathlib import Path from bgc_md2.models.CARDAMOM import CARDAMOMlib from bgc_md2.notebook_helpers import (write_to_logfile, load_zarr_archive) from dask.distributed import Client # - my_cluster = CARDAMOMlib.prepare_cluster(n_workers=48) Client(my_cluster) # ## How to connect to remote # **Remark**: Port values to be adapted, see above. # # ### remotely # ` # screen # # cd GitHub/bgc_md2/notebooks/CARDAMOM # conda activate bgc_md2 # jupyter lab --no-browser -- port=8890 # ` # ### locally # ` # ssh -L 8080:localhost:8890 antakya_from_home
# # Convert CARDAMOM netcdf data to yearly time steps # # This notebook loads the CARDAMOM netcdf data files and saves them as a single netcdf file in yearly time steps. # + import xarray as xr from pathlib import Path from tqdm import tqdm from bgc_md2.models.CARDAMOM import CARDAMOMlib from dask.distributed import Client # - my_cluster = CARDAMOMlib.prepare_cluster(n_workers=48) Client(my_cluster) nr_months = 12 # coarseness delay_in_months = 0 #delay_in_months = 6 # + data_path = Path("/home/data/CARDAMOM/Greg_2020_10_26/") target_path = data_path.joinpath("yearly_%02d_ds.nc" % delay_in_months) print("target_path", str(target_path)) ds = xr.open_mfdataset(str(data_path) + "/SUM*.nc") ds # -