def setUp(self): cable_out_path = Path( '/home/data/cable-data/example_runs/parallel_1901_2004_with_spinup/output/new4' ) zarr_cache_path = Path("cache") self.cable_out_path = cable_out_path self.zarr_cache_path = zarr_cache_path self.time_slice = slice(0, 100) self.landpoint_slice = slice(0, 4) self.cable_data_set = cable_ds(cable_out_path) self.kwargs = { 'cable_data_set': self.cable_data_set, 'zarr_cache_path': self.zarr_cache_path, 'landpoint_slice': self.landpoint_slice, 'time_slice': self.time_slice, 'batch_size': 32 } def check_presence(self, expected_names): zarr_dirs = [f.name for f in self.zarr_cache_path.iterdir()] self.assertEqual(frozenset(zarr_dirs), frozenset(expected_names)) def time_dict(self): return time_dict(self.zarr_cache_path)
"/home/data/cable-data/example_runs/parallel_1901_2004_with_spinup/output/new4" ) time_slice = slice(0, 2000) landpoint_slice = slice(0, 2028) # landpoint_slice = slice(None,None) # time_slice=slice(None,None,None) zarr_cache_path = cP.slice_dir_path( cable_out_path, sub_dir_trunk="zarr_mm11", # sub_dir_trunk='zarr_mm', time_slice=time_slice, landpoint_slice=landpoint_slice, ) if "cable_data_set" not in dir(): cable_data_set = cH.cable_ds(cable_out_path) args = { "cable_data_set": cable_data_set, "zarr_cache_path": zarr_cache_path, "landpoint_slice": landpoint_slice, "time_slice": time_slice, "batch_size": 64, #'batch_size': 8, #'rm': True } sol_org_iveg = cH.cacheWrapper(cC.sol_org_iveg, **args) sol_org_iveg # x_val = cH.cacheWrapper( # cC.x_val,
runId = "parallel_1901_2004_with_spinup" outDir = "output/new4" first_yr = 1901 last_yr = 2004 fns = [ "out_ncar_" + str(yr) + "_ndep.nc" for yr in range(first_yr, last_yr + 1) ] outpath = Path(cableDataDir).joinpath(runId, outDir) ps = [outpath.joinpath(fn) for fn in fns] # just have a peek at the dat0 = xr.open_dataset(ps[0]) # have a peek at the first file dat0 # assemble all files into one dataset ds = cH.cable_ds(outpath, first_yr, last_yr) for s in ('leaf', 'wood', 'fine_root', 'metabolic_lit', 'structural_lit', 'cwd', 'fast_soil', 'slow_soil', 'passive_soil'): var(s) stateVariableTuple = (leaf, fine_root, wood, metabolic_lit, structural_lit, cwd, fast_soil, slow_soil, passive_soil) npool = len(stateVariableTuple) npatch = ds.dims['patch'] nland = ds.dims['land'] ds.fromLeaftoL.sel(litter_casa_pools=[2], land=100)[0:-1:5000].plot(hue='patch') ds.fromLeaftoL.sel(litter_casa_pools=2, land=100).mean('patch')[0:-1:5000] post_processing_dir = "../src/bgc_md2/models/cable_all/cable_transit_time/postprocessing/scripts_org" dfac = xr.open_dataset(Path(post_processing_dir).joinpath('outAC.nc'))
def test_numeric_input_tuple(self): # setup the paths to the testdata cable_out_path = Path( '/home/data/cable-data/example_runs/parallel_1901_2004_with_spinup/output/new4' ) # cable_data_set = cH.cable_ds(cable_out_path) time_slice = slice(0, None) landpoint_slice = slice(1590, 1637) # cheated # landpoint_slice = slice(None,None) # time_slice=slice(None,None,None) zarr_cache_path = cP.slice_dir_path( cable_out_path, sub_dir_trunk="zarr_mm11", time_slice=time_slice, landpoint_slice=landpoint_slice, ) if "cable_data_set" not in dir(): cable_data_set = cH.cable_ds(cable_out_path) args = { "cable_data_set": cable_data_set, "zarr_cache_path": zarr_cache_path, "landpoint_slice": landpoint_slice, "time_slice": time_slice, #'batch_size': 128, "batch_size": 12, #'rm': True } x_org_iveg = cH.cacheWrapper(cC.x_org_iveg, **args) time = cH.cacheWrapper(cC.time, **args) patches, landpoints = cC.all_pools_vary_cond_nz(**args) pcs = patches.compute() lpcs = landpoints.compute() pcs, lpcs p = Path('plots') p.mkdir(exist_ok=True) ind = 0 # first pair that has a nonconstant solution for lp = lpcs[ind] patch = lpcs[ind] # get the symbolic represantation from the database mvs = self.mvs # define some stuff to extend it with def default(t): return 1 leaf = Symbol('leaf') fine_root = Symbol('fine_root') Npp = Function("Npp") bvec_leaf = Function("bvec_leaf") bvec_fine_root = Function("bvec_fine_root") xk_leaf_cold = Function("xk_leaf_cold") xk_leaf_dry = Function("xk_leaf_dry") kleaf = Function("kleaf") kfroot = Function("kfroot") # bvec_wood = Function("bvec_wood") np1 = NumericParameterization( par_dict={}, func_dict=frozendict({ Npp: default, bvec_fine_root: default, bvec_leaf: default, xk_leaf_cold: default, kleaf: default, kfroot: default, xk_leaf_dry: default, }), ) nsv1 = NumericStartValueDict({leaf: 0.3, fine_root: 3.96}) ntimes1 = NumericSimulationTimes(np.linspace(0, 1, 11)) # extend the symbolice version with the new stuff pvs = mvs.provided_mvar_values #from IPython import embed; embed() pvs1 = pvs.union(frozenset({np1, nsv1, ntimes1})) mvs1 = MVarSet(pvs1) x = mvs1.get_StateVariableTuple() Input = mvs1.get_InputTuple() #B = mvs1.get_CompartmentalMatrix() sym_times = mvs1.get_NumericSimulationTimes() sol_smooth = mvs1.get_NumericSolutionArray() comp_slice = slice(0, 100) n = sol_smooth.shape[1] fig = plt.figure() for pool in range(n): ax = fig.add_subplot(n + 1, 1, 2 + pool) title = "\$" + latex(x[pool]) + "\$" #ax.plot( # sym_times[comp_slice], # sol_smooth[comp_slice, pool], # color='r' #) ax.plot(time[comp_slice], x_org_iveg[comp_slice, pool, patch, lp], color='b') fontsize = 10 ax.set_title(title, fontsize=fontsize) fig.savefig('solution.pdf')
import xarray as xr import numpy as np import dask.array as da from bgc_md2.sitespecificHelpers import get_client from bgc_md2.models.cable_all.cableHelpers import cable_ds from dask.distributed import Client #client = Client(scheduler_file='/home/mm/scheduler.json') client = get_client() print(client) ds = cable_ds() dd =ds.dims npool = dd['plant_casa_pools']+dd['litter_casa_pools']+dd['soil_casa_pool'] npatch = dd['patch'] nland = dd['land'] ntime = dd['time'] # npool = 9 # npatch = 10 # nland = 500 # ntime = 300 A_chunk_shape = (npool,npool,npatch ,1) A_d = da.stack( [ np.stack( [np.eye(npool) for i in range(npatch)], axis=2 ) for j in range(nland) ], axis=3
# + #cluster=getCluster() #client=Client(cluster) # + #alternatively client = get_client() # - # now lets repeat this exercise for the whole multifile dataset example_run_dir = '/home/data/cable-data/example_runs/parallel_1901_2004_with_spinup/' outDir = "output/new4" outpath = Path(example_run_dir).joinpath(outDir) ds = cH.cable_ds(outpath) dd = ds.dims npool = sum([ dd[k] for k in ['plant_casa_pools', 'soil_casa_pool', 'litter_casa_pools'] ]) npatch = dd['patch'] nland = dd['land'] npool, npatch, nland dd # + # build C # 1.) similar to the ncl script iveg_m1 = np.where(ds.iveg.data == ifv, 18, ds.iveg.data) - 1 flat_m1 = iveg_m1.flatten()
from bgc_md2.models.cable_all.cableHelpers import cable_ds from dask.distributed import Client from pathlib import Path import bgc_md2.models.cable_all.cableHelpers as cH import zarr as zr #if __name__ == '__main__': from bgc_md2.sitespecificHelpers import get_client client = get_client() #client =Client(scheduler_file='/home/mm/scheduler.json') example_run_dir = '/home/data/cable-data/example_runs/parallel_1901_2004_with_spinup/output/new4' ds = cable_ds(example_run_dir) zarr_dir_path = Path(example_run_dir).joinpath('zarr') cH.write_vars_as_zarr(ds, str(zarr_dir_path)) # + # This code never finishes # dir_name2= example_run_dir+'xarray_ds_zarr' # ds.to_zarr(dir_name2) #serial # -
tk = "_FillValue" ifv = syds.iveg.attrs[tk] ffv = syds.Cplant.attrs[tk] it_max = syds.Cplant.shape[0] # time_slice=slice(0,it_max) time_slice = slice( 0, 100) # could be used to significantly shorten the reconstruction times # sl = slice(0,128) sl = slice(None, None, None) # for the full grid vcsp = cP.zarr_valid_landpoint_patch_combis_slice_path(cable_out_path, sl) zp = cP.zarr_path(cable_out_path) bs = 128 ds = cH.cable_ds(cable_out_path) def f(name): return cH.cache( zarr_dir_path=zp, name=name, arr=dask.array.asarray(ds[name].data), rm=False, batch_size=bs, ) Clitter = f("Clitter") Cplant = f("Cplant") Csoil = f("Csoil")