def bpch_to_netCDF(folder='none', filename='ctm.nc',\ bpch_file_list=None, remake=False, verbose=True): """ Converts GEOS-Chem ctm.bpch output file(s) to NetCDF """ # Check if file already exists and warn about remaking from bpch2netCDF import get_folder folder = get_folder(folder) output_file = os.path.join(folder, filename) # If the netCDf file already exists dont overwrite it without remake=True. if not remake: if os.path.exists(output_file): logging.warning(output_file + ' already exists. Not recreating.') return # By default look inside the folder for any files if bpch_file_list == None: bpch_files = glob.glob(folder + '/*.bpch*') if len(bpch_files) == 0: bpch_files = glob.glob(folder + '/*trac_avg*') if len(bpch_files) == 0: logging.error("No bpch files found in " + folder) raise IOError(folder + " contains no bpch files.") # Confirm the specified bpch files are there. else: file_list = [] for bpch_file in bpch_file_list: full_path = folder + '/' + bpch_file if not os.path.exists(full_path): logging.error(full_path + " could not be found") raise IOError("Full path could not be found") file_list.append(full_path) bpch_files = file_list # Open the bpch files logging.debug("The following bpch files were found:") logging.debug(str(bpch_files)) if verbose: print "Creating a netCDF file. This can take some time..." bpch_data = datasets.load(bpch_files) # Save the netCDF file # iris.fileformats.netcdf.save(data, output_file) datasets.save(bpch_data, output_file) logging.info("A netCDF file has been created with the name {ctm}".format( ctm=output_file)) return
def load_emission_data(emission_fields, return_data=False): """ Load the data field(s) corresponding to one or more given emission fields (base emission, scale factors and/or masks). Parameters ---------- emission_fields : (sequence of) emission field object(s) load data fields for these emission fields (:class:`EmissionBase`, :class:`EmissionScale` or :class:`EmissionMask` objects). return_data : bool if True, it will return the loaded data fields, in addition to assign it to the corresponding emission fields (:prop:`datafield`). Notes ----- The metadata and emission fields (:prop:`var_name` and :prop:`filename`) is used to load the data fields. """ # TODO: make it work for any dataset backend raise exceptions.NotYetImplementedError() if isinstance(emission_fields, (EmissionBase, EmissionScale, EmissionMask)): emission_fields = [emission_fields] data_fields = [] # TODO: load data fields at once for emission fields with the same filename for efield in emission_fields: constraint = datasets.Constraint( cube_func=lambda cube: efield.var_name == efield.var_name ) if efield.filename is not None: dfield = datasets.load(efield.filename, constraint) else: dfield = None efield.datafield = dfield data_fields.append(dfield) if return_data: return data_fields
def convert_to_netCDF(folder='none', filename='ctm.nc'): from bpch2netCDF import get_folder folder = get_folder(folder) # Get ctm.bpch ( inc. if named *trac_avg* ) import glob bpch_files = glob.glob(folder + '/*ctm.bpch*') if len(bpch_files) == 0: bpch_files = glob.glob(folder + '/*trac_avg*') # Open the bpch files try: from pygchem import datasets except: import pygchem.datafields as datasets data = datasets.load(bpch_files) # Save the netCDF file output_file = folder + '/' + filename import iris iris.fileformats.netcdf.save(data, output_file) return
def bpch_to_netCDF(folder=None, filename='ctm.nc', bpch_file_list=None, \ remake=False, filetype="*ctm.bpch*", verbose=False, **kwargs): """ Converts GEOS-Chem ctm.bpch output file(s) to NetCDF Parameters ---------- folder (str): working directory for data files filename (str): name to give created NetCDF bpch_file_list (list): list of files to convert remake (boolean): overwrite existing NetCDF file filetype (str): string with wildcards to match filenames ( e.g. *ctm.bpch*,*ts*bpch* ) verbose (boolean): print (minor) logging to screen Returns ------- (None) saves a NetCDF file to disk """ # Check if file already exists and warn about remaking from bpch2netCDF import get_folder folder = get_folder(folder) output_file = os.path.join(folder, filename) # If the netCDf file already exists dont overwrite it without remake=True. if not remake: if os.path.exists(output_file): logging.warning(output_file + ' already exists. Not recreating.') return # Look for files if file list is not provided. if isinstance( bpch_file_list, type(None) ): logging.debug("Searching for the following bpch filetype: {filetype}"\ .format(filetype=filetype)) bpch_files = glob.glob( folder + '/' + filetype ) if len(bpch_files) == 0: logging.error("No bpch files found in "+folder) raise IOError(folder + " contains no bpch files.") # use the specified files. else: file_list = [] for bpch_file in bpch_file_list: full_path = folder + '/' + bpch_file if not os.path.exists(full_path): logging.error(full_path + " could not be found") raise IOError("Full path could not be found") file_list.append(full_path) bpch_files = file_list # Open the bpch files logging.debug( "The following bpch files were found:") logging.debug( str(bpch_files) ) if verbose: print "Creating a netCDF file. This can take some time..." bpch_data = datasets.load(bpch_files) # Save the netCDF file # iris.fileformats.netcdf.save(data, output_file) datasets.save( bpch_data, output_file ) logging.info( "A netCDF file has been created with the name {ctm}".format(ctm=output_file)) return
def bpch_to_netCDF(folder=None, filename='ctm.nc', bpch_file_list=None, remake=False, filetype="*ctm.bpch*", check4_trac_avg_if_no_ctm_bpch=True, backend='PyGChem', verbose=False, **kwargs): """ Converts GEOS-Chem ctm.bpch output file(s) to NetCDF Parameters ---------- folder (str): working directory for data files filename (str): name to give created NetCDF bpch_file_list (list): list of files to convert remake (bool): overwrite existing NetCDF file filetype (str): string with wildcards to match filenames ( e.g. *ctm.bpch*, trac_avg.*, or *ts*bpch* ) verbose (bool): print (minor) logging to screen Returns ------- (None) saves a NetCDF file to disk """ import os # Check if file already exists and warn about remaking if __package__ is None: from .bpch2netCDF import get_folder else: from .bpch2netCDF import get_folder folder = get_folder(folder) output_file = os.path.join(folder, filename) # If the netCDf file already exists dont overwrite it without remake=True. if not remake: if os.path.exists(output_file): logging.warning(output_file + ' already exists. Not recreating.') return # Look for files if file list is not provided. if isinstance(bpch_file_list, type(None)): logging.debug("Searching for the following bpch filetype: {filetype}" .format(filetype=filetype)) bpch_files = glob.glob(folder + '/' + filetype) # Also check if directory contains *trac_avg* files, if no ctm.bpch if (len(bpch_files) == 0) and check4_trac_avg_if_no_ctm_bpch: filetype = '*trac_avg*' logging.info('WARNING! - now trying filetype={}'.format(filetype)) bpch_files = glob.glob(folder + '/' + filetype) # Raise error if no files matching filetype if len(bpch_files) == 0: logging.error("No bpch files ({}) found in {}".format(filetype, folder)) raise IOError("{} contains no bpch files.".format(folder)) # Use the specified files. else: file_list = [] for bpch_file in bpch_file_list: full_path = folder + '/' + bpch_file if not os.path.exists(full_path): logging.error(full_path + " could not be found") raise IOError("Full path could not be found") file_list.append(full_path) bpch_files = file_list # Open the bpch files logging.debug("The following bpch files were found (n={}):" .format(len(bpch_files))) logging.debug(str(bpch_files)) if verbose: print(("Creating a netCDF from {} file(s).".format(len(bpch_files)) + " This can take some time...")) if backend == 'PyGChem': # Load all the files into memory bpch_data = datasets.load(bpch_files) # Save the netCDF file datasets.save(bpch_data, output_file) elif backend == 'xbpch': import xbpch # Load all the files into memory (as xarray dataset object) ds = xbpch.open_mfbpchdataset(bpch_files) # save through xarray dataset object ds.to_netcdf(output_file, unlimited_dims={'time_counter': True}) elif backend == 'iris': # iris.fileformats.netcdf.save(data, output_file) print('WARNING NetCDF made by iris is non CF-compliant') elif backend == 'PNC': import PseudoNetCDF as pnc import xarray as xr if len(bpch_files) == 1: bpch_to_netCDF_via_PNC(filename=filename, output_file=output_file, bpch_file=bpch_files[0]) # Individually convert bpch files if more than one file if len(bpch_files) > 1: for n_bpch_file, bpch_file in enumerate(bpch_files): bpch_to_netCDF_via_PNC(filename=filename, output_file='TEMP_{}_'.format( n_bpch_file)+filename, bpch_file=bpch_file) # - Combine the NetCDF files with xarray TEMP_ncfiles = glob.glob(folder+'TEMP_*_'+filename) # Open files with xarray ds_l = [xr.open_dataset(i) for i in TEMP_ncfiles] # Make sure the time dimension is unlimitetd ds = xr.concat(ds_l, dim='time') # Now save the combined file ds.to_netcdf(folder+filename, unlimited_dims={'time_counter': True}) # Remove the temporary files for TEMP_ncfile in TEMP_ncfiles: os.remove(TEMP_ncfile) logging.info("A netCDF file has been created with the name {ctm}" .format(ctm=output_file)) return
else: # Get default settings for reader if isinstance(vars, type(None)): vars = [ 'IJ_AVG_S__O3' ] # ensure wd has a leading '/' if wd[-1] != '/': wd += '/' # Get files in dir ( more than one? ) fns = sorted( glob.glob( wd+ '*ctm*' ) ) if debug: print fns # Load files into Iris Cube cubes = datasets.load( fns, vars ) # If no data extracted, print our variables try: [ i[0].data for i in cubes ] except: print datasets.load( fns[0] ) print 'WARNING: no vars found for >{}<'.format( ','.join(vars) ) sys.exit( 0 ) # Temporary fix for back compatibility: # Just extract as numpy if not r_cubes: # Extract data
def open_netCDF(folder='none',filename='ctm.nc', bpch_file_names=None, remake=False): """ Opens the netCDF file from a GEOS-Chem run. Converts all .bpch files in a folder to a netCDF file if required. The Default folder is the current folder. The default ouptut filename is ctm.nc. Returns a netCDF4 Dataset object. """ import logging import os if (folder == 'none'): folder = os.getcwd() # Strip trailing "/" elif (folder[-1]=="/"): folder = folder[:-1] netCDF_filename = os.path.join(folder, filename) if remake: try: os.remove(netCDF_filename) logging.info("netCDF file deleted for remake") except: logging.info("Tried remaking but could not remove old file") logging.info('Opening netCDF file: ' + netCDF_filename) from netCDF4 import Dataset # Try to open a netCDF file try: # Hope for success netCDF_data = Dataset(netCDF_filename) logging.info("netCDF file opened successfuly") except: # If no netCDF file loaded, try making one from bpch logging.debug('No netCDF file found. Attempting to create one.') # Confirm aux files exist (tracerinfo.dat) # If they are not then the netCDF variable names can be bugged. # Code to do this check goes here... try: from pygchem import datasets except: import pygchem.datafields as datasets import glob if (bpch_file_names==None): bpch_files = glob.glob(folder + '/*.bpch' ) else: bpch_files = [] for bpch_file in bpch_file_names: bpch_location = folder + '/' + bpch_file if not os.path.isfile(bpch_location): raise IOError('{_file} not found'.format(_file=bpch_location)) bpch_files.append(bpch_location) logging.debug('Found ' + str(len(bpch_files)) + ' bpch files.') if (len(bpch_files) == 0): logging.error('No bpch files found.') raise IOError('Cannot find bpch files in {folder}'\ .format(folder=folder)) return pygchem_data = datasets.load(bpch_files) # Convert the dataset to an iris cube and export as netCDF logging.debug('Creating a netCDF file from bpch.') print "Creating a netCDF file from bpch" datasets.save( pygchem_data, netCDF_filename) # Save the dataset to disk # Open the netCDF dataset. netCDF_data = Dataset( netCDF_filename ) if (netCDF_data == None): logging.error('Error creating netCDF file from bpch.') raise IOError('Error creating netCDF file from bpch.') # Confirm that the netCDF file time size is the same size # as the size of the list # To-do: There is probably a cleaner way to do this. if not (len(bpch_files) == 1): if not (len(netCDF_data.variables['time']) == len(bpch_files)): logging.error('Incorrect amount of timesets from bpch files.'\ 'Could be due to incomplete bpch files') # Find any potential small files for bpch_file in bpch_files: if (os.path.getsize( bpch_file ) < 1000): logging.info('{filename} looks rather small...'\ .format(filename=bpch_file) ) # raise IOError('Incorrect amount of timesets from bpch files.') return netCDF_data
def bpch_to_netCDF(folder=None, filename='ctm.nc', bpch_file_list=None, \ remake=False, filetype="*ctm.bpch*", \ check4_trac_avg_if_no_ctm_bpch=True, verbose=False, **kwargs): """ Converts GEOS-Chem ctm.bpch output file(s) to NetCDF Parameters ---------- folder (str): working directory for data files filename (str): name to give created NetCDF bpch_file_list (list): list of files to convert remake (boolean): overwrite existing NetCDF file filetype (str): string with wildcards to match filenames ( e.g. *ctm.bpch*, trac_avg.*, or *ts*bpch* ) verbose (boolean): print (minor) logging to screen Returns ------- (None) saves a NetCDF file to disk """ # Check if file already exists and warn about remaking if __package__ is None: from .bpch2netCDF import get_folder else: from .bpch2netCDF import get_folder folder = get_folder(folder) output_file = os.path.join(folder, filename) # If the netCDf file already exists dont overwrite it without remake=True. if not remake: if os.path.exists(output_file): logging.warning(output_file + ' already exists. Not recreating.') return # Look for files if file list is not provided. if isinstance(bpch_file_list, type(None)): logging.debug("Searching for the following bpch filetype: {filetype}"\ .format(filetype=filetype)) bpch_files = glob.glob(folder + '/' + filetype) # Also check if directory contains *trac_avg* files, if no ctm.bpch if (len(bpch_files) == 0) and check4_trac_avg_if_no_ctm_bpch: filetype = '*trac_avg*' logging.info('WARNING! - now trying filetype={}'.format(filetype)) bpch_files = glob.glob(folder + '/' + filetype) # Raise error if no files matching filetype if len(bpch_files) == 0: logging.error("No bpch files ({}) found in {}".format( filetype, folder)) raise IOError("{} contains no bpch files.".format(folder)) # Use the specified files. else: file_list = [] for bpch_file in bpch_file_list: full_path = folder + '/' + bpch_file if not os.path.exists(full_path): logging.error(full_path + " could not be found") raise IOError("Full path could not be found") file_list.append(full_path) bpch_files = file_list # Open the bpch files logging.debug( "The following bpch files were found (n={}):" \ .format(len(bpch_files))) logging.debug(str(bpch_files)) if verbose: print("Creating a netCDF from {} file(s).".format(len(bpch_files))+\ " This can take some time...") bpch_data = datasets.load(bpch_files) # Save the netCDF file # iris.fileformats.netcdf.save(data, output_file) datasets.save(bpch_data, output_file) logging.info( "A netCDF file has been created with the name {ctm}"\ .format(ctm=output_file)) return