def test_create_dt_fpath_subdir_dt(): fpath_should = "/example/2000/01/file20000101name.nc" fpath = create_dt_fpath(datetime(2000, 1, 1), "/example", "file%Y%m%dname.nc", subdirs=["%Y", "%m"]) assert fpath == fpath_should
def test_create_dt_fpath_fname_dt(): fpath_should = "/example/sub1/sub2/file20000101name.nc" fpath = create_dt_fpath(datetime(2000, 1, 1), "/example", "file%Y%m%dname.nc", subdirs=["sub1", "sub2"]) assert fpath == fpath_should
def save_gribs_from_grib( input_grib, output_path, product_name, filename_templ="{product}_OPER_0001_AN_%Y%m%d_%H%M.grb"): """ takes monthly grib files as downloaded by the function above and saves each time step in a separate file Parameters ---------- input_nc : string filepath of the downloaded .grb file output_path : string where to save the resulting grib files product_name : string name of the ECMWF model (for filename generation) local_subdirs : list, optional List of subfolders for organizing downloaded data filename_templ : string, optional template for naming each separated nc file """ localsubdirs = ['%Y', '%j'] grib_in = pygrib.open(input_grib) grib_in.seek(0) for grb in grib_in: template = filename_templ param_id = grb['marsParam'] #N = grb['N'] step = grb['startStep'] filedate = datetime(grb['year'], grb['month'], grb['day'], grb['hour']) template = template.format(product=product_name) #param_id=param_id, #N=N) filepath = create_dt_fpath(filedate, root=output_path, fname=template, subdirs=localsubdirs) if not os.path.exists(os.path.dirname(filepath)): os.makedirs(os.path.dirname(filepath)) grb_out = open(filepath, 'ab') grb_out.write(grb.tostring()) grb_out.close() grib_in.close()
def save_gribs_from_grib( input_grib, output_path, product_name, filename_templ="{product}_AN_%Y%m%d_%H%M.grb", keep_original=True, ): """ Split the downloaded grib file into daily files and add to folder structure necessary for reshuffling. Parameters ---------- input_grib : str Filepath of the downloaded .grb file output_path : str Where to save the resulting grib files product_name : str Name of the ECMWF model (only for filename generation) filename_templ : str, optional (default: product_OPER_0001_AN_date_time) Template for naming each separated grb file """ localsubdirs = ["%Y", "%j"] grib_in = pygrib.open(input_grib) grib_in.seek(0) for grb in grib_in: template = filename_templ filedate = datetime(grb["year"], grb["month"], grb["day"], grb["hour"]) template = template.format(product=product_name) filepath = create_dt_fpath(filedate, root=output_path, fname=template, subdirs=localsubdirs) if not os.path.exists(os.path.dirname(filepath)): os.makedirs(os.path.dirname(filepath)) grb_out = open(filepath, "ab") grb_out.write(grb.tostring()) grb_out.close() grib_in.close() if not keep_original: os.remove(input_grib)
def save_ncs_from_nc(input_nc, output_path, product_name, filename_templ='{product}_{gridsize}_%Y%m%d_%H%M.nc'): """ takes monthly netcdf files as downloaded by the function above and saves each time step in a separate file Parameters ---------- input_nc : string filepath of the downloaded .nc file output_path : string where to save the resulting netcdf files product_name : string name of the ECMWF model (for filename generation) local_subdirs : list, optional List of subfolders for organizing downloaded data filename_templ : string, optional template for naming each separated nc file """ localsubdirs = ['%Y', '%j'] nc_in = xr.open_dataset(input_nc, mask_and_scale=True) latdiff = np.abs(np.round(np.ediff1d(nc_in.latitude.values), 3))[0] londiff = np.abs(np.round(np.ediff1d(nc_in.longitude.values), 3))[0] gridsize = '%s_%s' % (str(latdiff), str(londiff)) filename_templ = filename_templ.format(product=product_name, gridsize=gridsize) for time in nc_in.time.values: subset = nc_in.sel(time=time) timestamp = pd.Timestamp(time).to_pydatetime() filepath = create_dt_fpath(timestamp, root=output_path, fname=filename_templ, subdirs=localsubdirs) if not os.path.exists(os.path.dirname(filepath)): os.makedirs(os.path.dirname(filepath)) subset.to_netcdf(filepath) nc_in.close()
def save_ncs_from_nc(input_nc, output_path, product_name, filename_templ='{product}_AN_%Y%m%d_%H%M.nc'): """ Split the downloaded netcdf file into daily files and add to folder structure necessary for reshuffling. Parameters ---------- input_nc : str Filepath of the downloaded .nc file output_path : str Where to save the resulting netcdf files product_name : str Name of the ECMWF model (only for filename generation) filename_templ : str, optional (default: product_grid_date_time) Template for naming each separated nc file """ localsubdirs = ['%Y', '%j'] nc_in = xr.open_dataset(input_nc, mask_and_scale=True) filename_templ = filename_templ.format(product=product_name) for time in nc_in.time.values: subset = nc_in.sel(time=time) timestamp = pd.Timestamp(time).to_pydatetime() filepath = create_dt_fpath(timestamp, root=output_path, fname=filename_templ, subdirs=localsubdirs) if not os.path.exists(os.path.dirname(filepath)): os.makedirs(os.path.dirname(filepath)) # same compression for all variables var_encode = {'zlib': True, 'complevel': 6} subset.to_netcdf( filepath, encoding={var: var_encode for var in subset.variables}) nc_in.close()
def save_ncs_from_nc( input_nc, output_path, product_name, filename_templ="{product}_AN_%Y%m%d_%H%M.nc", grid=None, keep_original=True, remap_method="bil", ): """ Split the downloaded netcdf file into daily files and add to folder structure necessary for reshuffling. Parameters ---------- input_nc : str Filepath of the downloaded .nc file output_path : str Where to save the resulting netcdf files product_name : str Name of the ECMWF model (only for filename generation) filename_templ : str, optional (default: product_grid_date_time) Template for naming each separated nc file keep_original: bool keep the original downloaded data """ localsubdirs = ["%Y", "%j"] nc_in = xr.open_dataset(input_nc, mask_and_scale=True) filename_templ = filename_templ.format(product=product_name) if grid is not None: if not cdo_available: raise CdoNotFoundError() cdo = Cdo() gridpath = os.path.join(output_path, "grid.txt") weightspath = os.path.join(output_path, "remap_weights.nc") if not os.path.exists(gridpath): with open(gridpath, "w") as f: for k, v in grid.items(): f.write(f"{k} = {v}\n") for time in nc_in.time.values: subset = nc_in.sel(time=time) timestamp = pd.Timestamp(time).to_pydatetime() filepath = create_dt_fpath( timestamp, root=output_path, fname=filename_templ, subdirs=localsubdirs, ) if not os.path.exists(os.path.dirname(filepath)): os.makedirs(os.path.dirname(filepath)) if grid is not None: if not os.path.exists(weightspath): # create weights file getattr(cdo, "gen" + remap_method)(gridpath, input=subset, output=weightspath) subset = cdo.remap( ",".join([gridpath, weightspath]), input=subset, returnXDataset=True, ) # same compression for all variables var_encode = {"zlib": True, "complevel": 6} subset.to_netcdf( filepath, encoding={var: var_encode for var in subset.variables}) nc_in.close() if not keep_original: os.remove(input_nc) if grid is not None: cdo.cleanTempDir()