def combine(self, members, output_file, dimension=None, start_index=None, stop_index=None, stride=None): """ Combine many files into a single file on disk. Defaults to using the 'time' dimension. """ nco = None try: nco = Nco() except BaseException: # This is not necessarily an import error (could be wrong PATH) raise ImportError("NCO not found. The NCO python bindings are required to use 'Collection.combine'.") if len(members) > 0 and hasattr(members[0], 'path'): # A member DotDoct was passed in, we only need the paths members = [ m.path for m in members ] options = ['-4'] # NetCDF4 options += ['-L', '3'] # Level 3 compression options += ['-h'] # Don't append to the history global attribute if dimension is not None: if start_index is None: start_index = 0 if stop_index is None: stop_index = '' if stride is None: stride = 1 options += ['-d', '{0},{1},{2},{3}'.format(dimension, start_index, stop_index, stride)] nco.ncrcat(input=members, output=output_file, options=options)
def test_use_list_options(foo_nc): nco = Nco(debug=True) options = [] options.extend(['-a', 'units,time,o,c,days since 1999-01-01']) options.extend(['-a', 'long_name,time,o,c,time']) options.extend(['-a', 'calendar,time,o,c,noleap']) nco.ncrcat(input=foo_nc, output='out.nc', options=options)
def netCDF_merge(self, directory): """ To combine mutiple downloaded erai netCDF files into a large file with specified chunk_size(e.g. 500), -- give the full name of merged file to the output = outfile -- pass all data from the first input netfile to the merged file name -- loop over the files_list, append file one by one into the merge file -- pass the mergae netcdf file to interpolation module to process (to use nc.MFDataset by reading it) Args: ncfile_in: the full name of downloaded files (file directory + files names) e.g.: '/home/xquan/src/globsim/examples/erai/era_sa_*.nc' '/home/xquan/src/globsim/examples/erai/era_pl_*.nc' '/home/xquan/src/globsim/examples/erai/era_sf_*.nc' Output: merged netCDF files era_all_0.nc, era_all_1.nc, ..., """ # set up nco operator nco = Nco() # loop over filetypes, read, report file_type = ['erai_sa_*.nc', 'erai_sf_*.nc', 'erai_pl_*.nc'] for ft in file_type: ncfile_in = path.join(directory, ft) # get the file list files_list = glob.glob(ncfile_in) files_list.sort() num = len(files_list) # set up the name of merged file if ncfile_in[-7:-5] == 'sa': merged_file = path.join( ncfile_in[:-11], 'erai_sa_all_' + files_list[0][-23:-15] + "_" + files_list[num - 1][-11:-3] + '.nc') elif ncfile_in[-7:-5] == 'sf': merged_file = path.join( ncfile_in[:-11], 'erai_sf_all_' + files_list[0][-23:-15] + '_' + files_list[num - 1][-11:-3] + '.nc') elif ncfile_in[-7:-5] == 'pl': merged_file = path.join( ncfile_in[:-11], 'erai_pl_all_' + files_list[0][-23:-15] + '_' + files_list[num - 1][-11:-3] + '.nc') else: print('There is not such type of file') # combined files into merged files nco.ncrcat(input=files_list, output=merged_file, append=True) print('The Merged File below is saved:') print(merged_file) # clear up the data for fl in files_list: remove(fl)
def ocean_atlas_merge_time(output_dir): variable_merged_files = output_dir.glob('ocean_atlas_??.nc') variable_merged_files = [ str(merged_file) for merged_file in list(variable_merged_files) ] variable_merged_files.sort() output_file = output_dir.parent / 'ocean_atlas.nc' nco = Nco() options = ['-A'] nco.ncrcat(input=variable_merged_files, output=str(output_file), options=options)
def dailyAve(): from nco import Nco import datetime nco = Nco() for d in range(365): dp = datetime.date(startY,1,1)+datetime.timedelta(d) print "Averaging TRMM 3B42 for day "+dp.strftime('%j')+"..." ifile = ' '.join("3B42_daily."+str(year)+"."+dp.strftime('%m')+"."+dp.strftime('%d')+".7.nc" for year in range(startY,endY)) ofile = "3B42_aver."+dp.strftime('%j')+".nc" if not os.path.isfile(ofile): nco.ncra(input=ifile, output=ofile) nco.ncrcat(input="3B42_aver.*.nc", output="3B42_cat.nc", options="-d time,1,365") nco.ncwa(input="3B42_cat.nc", output="3B42_MAP.nc", options='-N -a time') return None
def dailyAve(): from nco import Nco import datetime nco = Nco() for d in range(365): dp = datetime.date(startY, 1, 1) + datetime.timedelta(d) print "Averaging TRMM 3B42 for day " + dp.strftime('%j') + "..." ifile = ' '.join("3B42_daily." + str(year) + "." + dp.strftime('%m') + "." + dp.strftime('%d') + ".7.nc" for year in range(startY, endY)) ofile = "3B42_aver." + dp.strftime('%j') + ".nc" if not os.path.isfile(ofile): nco.ncra(input=ifile, output=ofile) nco.ncrcat(input="3B42_aver.*.nc", output="3B42_cat.nc", options="-d time,1,365") nco.ncwa(input="3B42_cat.nc", output="3B42_MAP.nc", options='-N -a time') return None
def download_E39_obs(start_date, end_date, buoy, fjord, variable): """ Download E39 observations from https://thredds.met.no/ in netcdf format. """ nco = Nco() date_list = pd.date_range(start=start_date, end=end_date, freq='MS') outfile = date_list.strftime('%Y%m')[0] + '_' + date_list.strftime( '%Y%m')[-1] + '_E39_' + buoy + '_' + fjord + '_' + variable + '.nc' if os.path.exists(outfile): os.remove(outfile) print(outfile, 'already exists, so it will be deleted and create a new....') else: print("....") infile = [None] * len(date_list) # extract point and create temp files for i in range(len(date_list)): #infile[i] = 'https://thredds.met.no/thredds/dodsC/obs/buoy-svv-e39/'+date_list.strftime('%Y')[i] + '/' + date_list.strftime('%m')[i] + '/'+ date_list.strftime('%Y')[i] +date_list.strftime('%m')[i] + '_E39_'+buoy +'_'+fjord+'_'+variable +'.nc' url = 'https://thredds.met.no/thredds/fileServer/obs/buoy-svv-e39/' + date_list.strftime( '%Y' )[i] + '/' + date_list.strftime('%m')[i] + '/' + date_list.strftime( '%Y')[i] + date_list.strftime('%m')[ i] + '_E39_' + buoy + '_' + fjord + '_' + variable + '.nc' infile[i] = date_list.strftime('%Y')[i] + date_list.strftime( '%m')[i] + '_E39_' + buoy + '_' + fjord + '_' + variable + '.nc' urlretrieve(url, infile[i]) print(infile) #merge temp files nco.ncrcat(input=infile, output=outfile) #remove temp files for i in range(len(date_list)): os.remove(infile[i]) return
def combine(self, members, output_file, dimension=None, start_index=None, stop_index=None, stride=None): """ Combine many files into a single file on disk. Defaults to using the 'time' dimension. """ nco = None try: nco = Nco() except BaseException: raise ImportError("NCO not found. The NCO python bindings are required to use 'Collection.combine'.") if len(members) > 0 and hasattr(members[0], 'path'): # A member DotDoct was passed in, we only need the paths members = [ m.path for m in members ] options = ['-4'] # NetCDF4 options += ['-L', '3'] # Level 3 compression options += ['-h'] # Don't append to the history global attribute if dimension is not None: if start_index is None: start_index = 0 if stop_index is None: stop_index = '' if stride is None: stride = 1 options += ['-d', '{0},{1},{2},{3}'.format(dimension, start_index, stop_index, stride)] nco.ncrcat(input=members, output=output_file, options=options)
def xgeo_multifile_load(nc_wildcard, nc_dir=None): """ Read data from multiple (e.g. hourly xgeo data) netcdf files :param nc_wildcard: common section of the filenams, e.g. mf_files*.nc :return: """ if dir: nc_path = os.path.join(nc_dir, nc_wildcard) else: nc_path = nc_wildcard nco = Nco() nc_temp = nco.ncrcat(input=nc_path) nc = netCDF4.Dataset(nc_temp) #add function to restrict dates and times return nc
def goesr_nc_concat(fn_list, fn_concat=None, read_only=True, global_attrs=None, debug=False): """ Returns a NetCDF object representing a concatenation of multiple files using NCO bindings. Args: fn_list (list): List of files. e.g.: ['OR_SEIS-L1b-MPSL_G16_20032991650000_20032991650290_20151252148107.nc', 'OR_SEIS-L1b-MPSL_G16_20032991650300_20032991650590_20151252148107.nc'] Keyword Args: fn_concat (string): Optionally save new NetCDF file permanently to this path (relative is ok). e.g.: '/tmp/test_goesr_nc_concat.nc' read_only (True): Only effective if 'fn_concat' supplied. True: returned NC object is writable. False: returned NC object is read only. global_attrs (list): Returns list of global attributes for each file concatenated. This is a temporary step toward figuring out what to do with time varying global attributes. debug (True): True: print debug on STDOUT. False: no STDOUT. Returns: NetCDF object reference to aggregated result. None if an exception occurs. Examples (also, see main() below): 1) Get MPS-LO: >>> import glob, numpy as np, goesr_nc_concat >>> fn_list = sorted( glob.glob( ... 'OR_SEIS-L1b-MPSL_G??_??????????????_??????????????_??????????????.nc') ) >>> nc_all = goesr_nc_concat.goesr_nc_concat( fn_list, debug=True ) 2) Print Global Attributes: >>> print( 'Global Attributes:' ) >>> nc_all_dict = nc_all.__dict__ >>> for attr in nc_all_dict: print( '\t%s: %s' % (attr, nc_all_dict[ attr ]) ) 3) Save concatenation to file system permanently: >>> fn_concat = '/tmp/test_goesr_nc_concat.nc' >>> nc_all = goesr_nc_concat.goesr_nc_concat( fn_list, fn_concat=fn_concat, debug=True ) 4) Use data: >>> print( 'Mean Diff-e-flux: %f, %s' % ... ( np.mean( nc_all.variables['DiffElectronFluxes'] ), ... nc_all.variables['DiffElectronFluxes'].units ) ) Authors: R. Redmon (NOAA/NCEI), """ # TODO: Handle time varying global attributes. As sidecar dictionary? As new variables in returned object? # TODO: Replace STDOUT debug with logger. try: ' CONFIG ' my_name = 'goesr_nc_concat' file_rw_access = 'r' if read_only else 'r+' # Read Only or Writable? if 0 == len(fn_list): return None ' Imports ' import shutil, traceback from netCDF4 import Dataset as NCDataset from nco import Nco ' Concatenate to a Temporary File ' nco = Nco() fn_tmp = nco.ncrcat(input=fn_list, options='--create_ram') if debug: print(my_name + ': Aggregated %d files to temporary file: %s' % (len(fn_list), fn_tmp)) ' Save Temporary File Permanently (User Choice) ' if fn_concat != None: shutil.copy2(fn_tmp, fn_concat) if debug: print(my_name + ': Saved aggregated file permanently as: %s' % fn_concat) fn_tmp = fn_concat ' Open Concatenate as netcdf4-python::Dataset ' nc_all = NCDataset(fn_tmp, file_rw_access) ' Keep Global Attributes as Ordered List (User Choice) ' if global_attrs != None: for fn in fn_list: global_attrs.append(NCDataset(fn).__dict__) ' Return NC Object Reference ' return nc_all except Exception as e: # TODO: Exception prints stack trace. print(my_name + ': EXCEPTION: %s' % str(e)) print(repr(traceback.format_stack())) return None
def test_use_list_inputs(foo_nc, bar_nc): nco = Nco(debug=True) infiles = [foo_nc, bar_nc] nco.ncrcat(input=infiles, output="out.nc")
def combinelon(prefix, inputdir, fill_value='1e20', daily=False, year=None): if daily: files = [ inputdir + sep + f for f in filter(listdir(inputdir), '%s*.%d.psims.nc' % (prefix, year)) ] else: files = [ inputdir + sep + f for f in filter(listdir(inputdir), '%s*.psims.nc' % prefix) ] # tile latitude and longitude indices tlatidx = basename(files[0]).split('_')[1] lonidx = [int(basename(f).split('_')[2][:4]) for f in files] # get file information with nc(files[0]) as f: vars = setdiff1d(f.variables.keys(), ['time', 'scen', 'irr', 'lat', 'lon']) nscen = f.variables['scen'].size ldim = f.variables[vars[0]].dimensions[0] vunits = [0] * len(vars) vlnames = [0] * len(vars) for i in range(len(vars)): var = f.variables[vars[i]] vunits[i] = var.units if 'units' in var.ncattrs() else '' vlnames[i] = var.long_name if 'long_name' in var.ncattrs() else '' # fill longitude gaps for idx in setdiff1d(fulllonidx, lonidx): if daily: lonfile = inputdir + sep + '%s_%s_%04d.%d.psims.nc' % ( prefix, tlatidx, idx, year) else: lonfile = inputdir + sep + '%s_%s_%04d.psims.nc' % (prefix, tlatidx, idx) copyfile(files[0], lonfile) lons = arange(-180 + tlond * (idx - 1) + lond / 2., -180 + tlond * idx, lond) with nc(lonfile, 'a') as f: lonvar = f.variables['lon'] lonvar[:] = lons for i in range(len(vars)): var = f.variables[vars[i]] var[:] = masked_array(zeros(var.shape), mask=ones(var.shape)) files.append(lonfile) # output file if daily: outputfile = outputdir + sep + '%s_%s.%d.psims.nc' % (prefix, tlatidx, year) else: outputfile = outputdir + sep + '%s_%s.psims.nc' % (prefix, tlatidx) nco = Nco() # make longitude lead dimension for i in range(len(files)): nco.ncpdq(input=files[i], output=files[i], options='-O -h -a lon,%s' % str(ldim)) # concatenate all files if daily: inputfiles = ' '.join([ inputdir + sep + '%s_%s_%04d.%d.psims.nc' % (prefix, tlatidx, idx, year) for idx in fulllonidx ]) else: inputfiles = ' '.join([ inputdir + sep + '%s_%s_%04d.psims.nc' % (prefix, tlatidx, idx) for idx in fulllonidx ]) nco.ncrcat(input=inputfiles, output=outputfile, options='-h') # make latitude lead dimension nco.ncpdq(input=outputfile, output=outputfile, options='-O -h -a lat,lon') # add new scenario dimension nscennew = nscen / (1 + irrflag) scen_range = ','.join([str(s) for s in range(1, nscennew + 1)]) scenopt = '-O -h -s \'defdim("scen_new",%d)\' -s "scen_new[scen_new]={%s}"' % ( nscennew, scen_range) nco.ncap2(input=outputfile, output=outputfile, options=scenopt) nco.ncatted(input=outputfile, output=outputfile, options='-O -h -a units,scen_new,c,c,"no"') nco.ncatted(input=outputfile, output=outputfile, options='-O -h -a long_name,scen_new,c,c,"scenario"') # add irr dimension nirr = 1 + irrflag irr_range = ','.join([str(i) for i in range(1, nirr + 1)]) irr_lname = ['ir', 'rf'][:1 + irrflag] if irr1st else ['rf', 'ir'][:1 + irrflag] irropt = '-O -h -s \'defdim("irr",%d)\' -s "irr[irr]={%s}"' % (nirr, irr_range) nco.ncap2(input=outputfile, output=outputfile, options=irropt) nco.ncatted(input=outputfile, output=outputfile, options='-O -h -a units,irr,c,c,"mapping"') nco.ncatted(input=outputfile, output=outputfile, options='-O -h -a long_name,irr,c,c,"%s"' % ','.join(irr_lname)) # refactor variables for i in range(len(vars)): var = str(vars[i]) # create new variable opt = '-O -h -s "\'%s_new\'[lat,scen_new,irr,lon,time]=0.0f"' % var nco.ncap2(input=outputfile, output=outputfile, options=opt) # set attributes opt = '-O -h -a _FillValue,%s_new,c,f,%s' % (var, fill_value) nco.ncatted(input=outputfile, output=outputfile, options=opt) if vunits[i]: opt = '-O -h -a units,%s_new,c,c,"%s"' % (var, str(vunits[i])) nco.ncatted(input=outputfile, output=outputfile, options=opt) if vlnames[i]: opt = '-O -h -a long_name,%s_new,c,c,"%s"' % (var, str(vlnames[i])) nco.ncatted(input=outputfile, output=outputfile, options=opt) # set value opt = '-O -h -s "\'%s_new\'(:,:,:,:,:)=\'%s\'"' % (var, var) nco.ncap2(input=outputfile, output=outputfile, options=opt) # remove old variable opt = '-O -h -x -v %s' % var nco.ncks(input=outputfile, output=outputfile, options=opt) # rename new variable opt = '-O -h -v %s_new,%s' % (var, var) nco.ncrename(input=outputfile, output=outputfile, options=opt) # remove old scenario dimension nco.ncks(input=outputfile, output=outputfile, options='-O -h -x -v scen') nco.ncrename(input=outputfile, output=outputfile, options='-O -h -v scen_new,scen') # limit spatial extent to sim grid nco.ncks(input=outputfile, output=outputfile, options='-O -h -d lon,%f,%f' % (lon0, lon1))
latidx = int((tlatdelta * (tlatidx - 1) + tslatdelta * (slatidx - 1) + latdelta * i) / latdelta) filename = 'daily_%04d.%04d.psims.nc' % (latidx, year) files_to_remove.append(filename) pool.apply_async(combine_daily, [latidx, lonidx, year, filename]) pool.close() pool.join() # Concatenate along latitude and permute dimensions of final file print "Running concat" part_directory = os.path.join('parts', '%04d' % tslatidx) finalfile = os.path.join(part_directory, 'output_%04d_%04d.psims.nc' % (tslatidx, tslonidx)) files_to_copy.append(finalfile) mkdir_p(part_directory) nco.ncrcat(input='output_*.psims.nc', output=finalfile, options='-h') nco.ncpdq(input=finalfile, output=finalfile, options='-O -h -a lon,lat') nco.ncpdq(input=finalfile, output=finalfile, options='-O -h -a time,lon') # Concatenate along latitude and permute dimensions of final file if daily and daily_combine: print "Running concat daily" for year in range(ref_year, ref_year + num_years): part_directory = os.path.join('parts', '%04d' % tslatidx) finalfile_daily = os.path.join( part_directory, 'daily_%04d_%04d.%04d.psims.nc' % (tslatidx, tslonidx, year)) files_to_copy.append(finalfile_daily) mkdir_p(part_directory) nco.ncrcat(input='daily_*.%04d.psims.nc' % year, output=finalfile_daily,
nco = Nco() tmpdir = tempfile.mkdtemp(prefix='monthly_mean_tmp', dir=os.path.join(os.getenv('CSCRATCH'))) print "placing monthly averages in {}".format(tmpdir) m0, y0 = (11, 15) # start month, start year m1, y1 = (1, 16) # end month, end year for this_run in clm_runs: out_files = [] for this_year, this_month in month_year_iter(m0, y0, m1, y1): pat = "*{}*{:04d}-{:02d}*.nc".format(which_hist, this_year, this_month) this_run.gather_filenames(glob_pat=pat) if len(this_run.all_files) != 0: out_fname = os.path.join( tmpdir, '{}_{:04d}-{:02d}_{}avg.nc'.format(this_run.CASE, this_year, this_month, which_hist)) print 'calculating {}'.format(out_fname) nco.ncra(input=this_run.all_files, output=out_fname) out_files.append(out_fname) print "concatenating {} monthly averages".format(this_run.CASE) nco.ncrcat(input=out_files, output=os.path.join( tmpdir, '{}_{}avg.nc'.format(this_run.CASE, which_hist)))
def combinelat(prefix, inputdir, outputfile, daily, year): try: if daily: files = [ inputdir + sep + f for f in filter(listdir(inputdir), '%s*.%d.psims.nc' % (prefix, year)) ] else: files = [ inputdir + sep + f for f in filter(listdir(inputdir), '%s*.psims.nc' % prefix) ] # tile latitude and longitude indices latidx = [int(basename(f).split('_')[1][:4]) for f in files] # variables with nc(files[0]) as f: vars = setdiff1d(f.variables.keys(), ['time', 'scen', 'irr', 'lat', 'lon']) # fill longitude gaps for idx in setdiff1d(fulllatidx, latidx): if daily: latfile = inputdir + sep + '%s_%04d.%d.psims.nc' % (prefix, idx, year) else: latfile = inputdir + sep + '%s_%04d.psims.nc' % (prefix, idx) copyfile(files[0], latfile) lats = arange(90 - tlatd * (idx - 1) - latd / 2., 90 - tlatd * idx, -latd) with nc(latfile, 'a') as f: latvar = f.variables['lat'] latvar[:] = lats for i in range(len(vars)): var = f.variables[vars[i]] var[:] = masked_array(zeros(var.shape), mask=ones(var.shape)) files.append(latfile) # output file nco = Nco() # concatenate all files if daily: inputfiles = ' '.join([ inputdir + sep + '%s_%04d.%d.psims.nc' % (prefix, idx, year) for idx in fulllatidx ]) else: inputfiles = ' '.join([ inputdir + sep + '%s_%04d.psims.nc' % (prefix, idx) for idx in fulllatidx ]) nco.ncrcat(input=inputfiles, output=outputfile, options='-h') # make time lead dimension if timelead: nco.ncpdq(input=outputfile, output=outputfile, options='-O -h -a time,lat') # limit spatial extent to sim grid nco.ncks(input=outputfile, output=outputfile, options='-O -h -d lat,%f,%f' % (lat1, lat0)) # compress system('nccopy -d9 -k4 %s %s' % (outputfile, outputfile + '.tmp')) rename(outputfile + '.tmp', outputfile) except: print "[%s] %s" % (os.path.basename(__file__), traceback.format_exc())
# get the files we want to work with for this run cur_df = files_df[ (files_df.variable == variable) & (files_df.model == model) & (files_df.scenario == scenario) ].copy() cur_df = cur_df.apply( pd.to_numeric, errors='ignore' ) # dtypes updating cur_df = cur_df.sort_values(['end_year']) # sort it based on the end_year cur_files = cur_df['fn'].tolist() if len(cur_files) > 0: begin, end = cur_df.iloc[0]['begin_year'], cur_df.iloc[-1]['end_year'] begin = str(begin)+'01' end = str(end)+'12' dirname, basename = os.path.split(cur_files[0]) basename = basename.split( '.' )[0] basename = '_'.join(basename.split( '_' )[0:-2]) output_filename = os.path.join( output_path, model, scenario, variable, basename+'_'+begin+'-'+end+'.nc' ) try: dirname, basename = os.path.split( output_filename ) if not os.path.exists( dirname ): os.makedirs( dirname ) except: pass if len( cur_files ) > 1: _ = nco.ncrcat( input=cur_files, output=output_filename ) # os.system( 'ncrcat ' + ' '.join([ fn for fn in files ]) + ' -o ' + output_filename elif len(cur_files) == 1: fn, = cur_files basename = os.path.basename( fn ) _ = shutil.copy( fn, os.path.join( output_path, model, scenario, variable, basename ) ) else: print( '___ --> NODATA for: {} {} {}'.format(model, scenario, variable) )