def read(self, t=None, z=None, y=None, x=None, N=None, K=None, J=None, I=None, var=None, nonan=True, result='full', profile=False, dummy=False): """Reads dataset. PARAMETERS t, z, y, x (array like, optional) : Sets the time, height, latitude and longitude for which the data will be read. N, K, J, I (array like, optional) : Sets the temporal, vertical, meridional and zonal indices for which the data will be read. var (string, optional) : Indicates which variable of the grid will be read. If the parameter is a list of variables, then the data will be returned as a list of arrays. nonan (boolean, optional) : If set to true (default) changes data values containing NaN to zero, preserving the mask. result (string, optional) : Determines wheter all time, height, latitude, longitude and data will be returned ('full', default), if temporal, vertical, meridional and zonal indices are returned instead ('indices'), or if only variable data is returned ('var only'). profile (boolean, optional) : Sets whether the status is send to screen. dummy (boolean, optional) : If set to true, does not load data and returns the shape of the array that would have been returned. RETURNS t, z, y, x, dat (array like) : If 'result' is set to 'full', then all coordinates and data variables are returned. N, K, J, I, var (array like) : If 'result' is set to 'indices', then all indices and data variables are returned. dat (array like) : If 'result' is set to 'var only', then the data is returned. """ global DEBUG t1 = time() # Checks input variables for consistency. if (t != None) & (N != None): raise ValueError('Both time and temporal index were provided.') if (z != None) & (K != None): raise ValueError('Both height and vertical index were provided.') if (y != None) & (J != None): raise ValueError( 'Both latitude and meridional index were provided.') if (x != None) & (I != None): raise ValueError('Both latitude and zonal index were provided.') if var == None: var = self.params['var_list'] # Checks for variables indices. Intersects desired input values with # dataset dimesion data. In this dataset, since only surface data is # available, the height values are always zero. if t != None: N = flatnonzero(in1d(self.variables['time'].data, t)) elif N == None: N = arange(self.dimensions['n']) if z != None: K = [0] elif K == None: K = [0] elif K != None: K = [0] if y != None: J = flatnonzero(in1d(self.variables['latitude'].data, y)) elif J == None: J = arange(self.dimensions['j']) if x != None: I = flatnonzero(in1d(self.variables['longitude'].data, x)) elif I == None: I = arange(self.dimensions['i']) # Sets the shape of the data array. shape = (len(N), 1, len(J), len(I)) if dummy: return shape # Selects data according to indices. t = self.variables['time'].data[N] z = self.variables['height'].data y = self.variables['latitude'].data[J] x = self.variables['longitude'].data[I] xx, yy = meshgrid(x, y) II, JJ = meshgrid(I, J) # Ressets variables Var = dict() Datasets = dict() for item in var: Var[item] = ma.zeros(shape) try: Datasets[self.params['var_tcid'][item][0]][1].append( self.params['var_tcid'][item][2] ) except: Datasets[self.params['var_tcid'][item][0]] = [ self.params['var_tcid'][item][1], [self.params['var_tcid'][item][2]] ] # Walks through every time index and loads data range from maps. for n, T in enumerate(t): t2 = time() if profile: s = '\rLoading data... %s ' % (profiler(shape[0], n + 1, 0, t1, t2),) stdout.write(s) stdout.flush() # Reads NetCDF file for each dataset for Dataset, (Datavar, Datagrid) in Datasets.items(): params = dict(path=self.params['path'], dataset=Dataset, datavar=Datavar, **self.params['file_list'][N[n]]) fname = self.create_filename(**params) data = self.read_file(fname) # for Grid in Datagrid: nvar = self.params['var_dict']['{0}_{1}'.format(Dataset, Grid)] if (('lon_i' in self.params.keys()) & ('lat_j' in self.params.keys())): P = data.variables[Grid].data.T[self.params['lat_j'], self.params['lon_i']][JJ, II] else: P = data.variables[Grid].data.T[JJ, II] P[P >= self.variables[item].missing_value] = nan P = ma.masked_where(isnan(P), P) if nonan: P.data[P.mask] = 0 # Var[nvar][n, 0, :, :] += P[:, :] # self.close_file(data) # If result dictionary contains only one item, return only the value # of this item. if len(Var.keys()) == 1: Var = Var[Var.keys()[0]] if profile: stdout.write('\r\n') stdout.flush() if DEBUG: print 't: ', t print 'z: ', z print 'y:', y print 'x:', x print 'var: ', Var print 'N: ', N print 'K: ', K print 'J: ', J print 'I:', I print 'shape: ', shape if result == 'full': return t, z, y, x, Var elif result == 'indices': return N, K, J, I, Var elif result == 'var only': return Var else: raise Warning("Result parameter set imporperly to '%s', " "assuming 'var only'." % (result)) return Var
def read(self, var, x=None, y=None, radius=0., tlim=None, ylim=None, xlim=None, missions=None, sort=True, profile=True): """Reads dataset. PARAMETERS var (string) : Variable to be read from dataset. It also accepts special naming conventions in order to rename the original dataset variable and to load alternative variables in case of invalid data according to the syntax '[new_var_name]:var[|other_var]'. x, y (array like, optional) : List of zonal and meridional point coordinate of interest. radius (float, optional) : Search radius in degrees. tlim, ylim, xlim (array like, optional) : The temporal, meridional and zonal limits (minimum, maximum) for which data will be read. missions (array like, optional) : List of missions to read data from. If omitted, defaults available missions on dataset class intialization. sort (boolean optional) : If true, sorts the data record in order of ascendant time, latitude and longitude. profile (boolean, optional) : Sets whether the status is send to screen. RETURNS dat (record array) : Record time-series of 'time', 'latitude', 'longitude', selected variable and 'mission'. """ t0 = time() # Checks input parameters. T = self.variables['time'].data if var.find(':') >= 0: # Checks spetial variable syntax var_name, var = var.split(':') else: var_name = var if tlim == None: tlim = (T.min(), T.max()) if (x != None) | (y != None): x, y = asarray(x), asarray(y) if x.size != y.size: raise ValueError('Zonal and meridional coordinate dimensions ' 'do not match.') npoints = x.size radius2 = radius ** 2 else: npoints = 0 x = y = [] # if ylim == None: ylim = (-90., 90.) if xlim == None: xlim = (0., 360.) else: # Make sure longitude limits are between 0 and 360. xlim = list(lon360(asarray(xlim))) if missions == None: missions = self.params['missions'] # First we have to select which files will be loaded, which will # depend on the temporal limits given in $t$. sel_time = flatnonzero((T >= floor(min(tlim))) & (T <= ceil(max(tlim)))) N = len(sel_time) # Second we will walk through each of the selected time in the dataset # and load the correspondant file for the available missions. t1 = time() if profile: s = '\rLoading data...' stdout.write(s) stdout.flush() # Reset important variables TIME, LAT, LON, VAR, MISSION = [array([])] * 5 # for i, tm in enumerate(T[sel_time]): t2 = time() for (mission, dset, fname, cycle, orbit) in self.attributes['time_dataset'][tm]: # Skips mission not in missions list. if mission not in missions: continue # Uncompresses gzipped file and opens NetCDF instance. data = self.read_file('%s/%s/%s' % (self.params['path'], mission, fname)) # Reads variable from NetCDF file. raw_time = self.read_variable(data, 'time') raw_lat = self.read_variable(data, 'lat') raw_lon = self.read_variable(data, 'lon') raw_dat = self.read_variable(data, var) # Select relevant data range according to limit parameters sel_from_time = ( (raw_time >= min(tlim)) & (raw_time <= max(tlim)) ) if (ylim != None) | (xlim !=None): sel_from_limits = ones(data.dimensions['time'], dtype=bool) else: sel_from_limits = zeros(data.dimensions['time'], dtype=bool) if ylim != None: sel_from_limits = (sel_from_limits & ((raw_lat >= min(ylim)) & (raw_lat <= max(ylim)))) if xlim != None: sel_from_limits = (sel_from_limits & ((raw_lon >= min(xlim)) & (raw_lon <= max(xlim)))) # Select relevant data according to points and search radius. sel_from_radius = zeros(data.dimensions['time'], dtype=bool) for xx, yy in zip(x, y): distance2 = ((raw_lat - yy) ** 2 + (raw_lon - lon360(xx)) ** 2) sel_from_radius = sel_from_radius | (distance2 <= radius2) # sel_data = flatnonzero(sel_from_time & (sel_from_limits | sel_from_radius) & (~isnan(raw_dat))) _time = raw_time[sel_data] _lat = raw_lat[sel_data] _lon = raw_lon[sel_data] _dat = raw_dat[sel_data] # TIME = append(TIME, _time) LAT = append(LAT, _lat) LON = append(LON, _lon) VAR = append(VAR, _dat) MISSION = append(MISSION, [mission] * len(sel_data)) # self.close_file(data) # # Profiling if profile: s = '\rLoading data... %s ' % (profiler(N, i+1, t0, t1, t2),) stdout.write(s) stdout.flush() # if profile: stdout.write('\n') stdout.flush() # Converts the data a structured array DAT = rec.fromarrays((TIME, LAT, LON, VAR, MISSION), dtype=[('time', float64), ('latitude', float64), ('longitude', float64), (var_name, float64), ('mission', '|S3')]) # Some data sorting? if sort: DAT.sort(order=('time', 'latitude', 'longitude'), axis=0) return DAT
def read(self, t=None, z=None, y=None, x=None, N=None, K=None, J=None, I=None, var=None, nonan=True, result='full', profile=False, dummy=False): """Reads dataset. PARAMETERS t, z, y, x (array like, optional) : Sets the time, height, latitude and longitude for which the data will be read. N, K, J, I (array like, optional) : Sets the temporal, vertical, meridional and zonal indices for which the data will be read. var (string, optional) : Indicates which variable of the grid will be read. If the parameter is a list of variables, then the data will be returned as a list of arrays. nonan (boolean, optional) : If set to true (default) changes data values containing NaN to zero, preserving the mask. result (string, optional) : Determines wheter all time, height, latitude, longitude and data will be returned ('full', default), if temporal, vertical, meridional and zonal indices are returned instead ('indices'), or if only variable data is returned ('var only'). components (list, optional) : A list containing which components will be included in the calculation. Options are the seasonal cycle ('seasonal'), westward propagating planetary waves ('planetary'), eddy fields ('eddy') and noise ('noise'). profile (boolean, optional) : Sets whether the status is send to screen. dummy (boolean, optional) : If set to true, does not load data and returns the shape of the array that would have been returned. RETURNS t, z, y, x, dat (array like) : If 'result' is set to 'full', then all coordinates and data variables are returned. N, K, J, I, var (array like) : If 'result' is set to 'indices', then all indices and data variables are returned. dat (array like) : If 'result' is set to 'var only', then the data is returned. """ global DEBUG t1 = time() # Checks input variables for consistency. if (t != None) & (N != None): raise ValueError('Both time and temporal index were provided.') if (z != None) & (K != None): raise ValueError('Both height and vertical index were provided.') if (y != None) & (J != None): raise ValueError( 'Both latitude and meridional index were provided.') if (x != None) & (I != None): raise ValueError('Both latitude and zonal index were provided.') if var == None: var = self.params['var_list'] # Checks for variables indices. Intersects desired input values with # dataset dimesion data. In this dataset, since only surface data is # available, the height values are always zero. if t != None: N = flatnonzero(in1d(self.variables['time'].data, t)) elif N == None: N = arange(self.dimensions['n']) if z != None: K = [0] elif K == None: K = [0] elif K != None: K = [0] if y != None: J = flatnonzero(in1d(self.variables['latitude'].data, y)) elif J == None: J = arange(self.dimensions['j']) if x != None: I = flatnonzero(in1d(self.variables['longitude'].data, y)) elif I == None: I = arange(self.dimensions['i']) # Sets the shape of the data array. shape = (len(N), 1, len(J), len(I)) if dummy: return shape # Selects data according to indices. t = self.variables['time'].data[N] z = self.variables['height'].data y = self.variables['latitude'].data[J] x = self.variables['longitude'].data[I] xx, yy = meshgrid(x, y) II, JJ = meshgrid(I, J) # Ressets variables Var = dict() if ('taux' in var) & ('tauy' in var): tauxy = True else: tauxy = False for item in var: if (item == 'taux') & tauxy: Var['tauxy'] = ma.zeros(shape, dtype=complex) elif (item == 'tauy') & tauxy: continue else: Var[item] = ma.zeros(shape) # Walks through every time index and loads data range from maps. for n, T in enumerate(t): t2 = time() if profile: s = '\rLoading data... %s ' % (profiler( shape[0], n + 1, 0, t1, t2), ) stdout.write(s) stdout.flush() # Reads NetCDF file data = netcdf( '%s/%s' % (self.params['path'], self.params['file_list'][N[n]]), 'r') for item in var: if (('lon_i' in self.params.keys()) & ('lat_j' in self.params.keys())): P = data.variables[item].data[0, 0, self.params['lat_j'], self.params['lon_i']][JJ, II] else: P = data.variables[item].data[0, 0, JJ, II] P[P <= self.variables[item].missing_value] = nan P = ma.masked_where(isnan(P), P) if nonan: P.data[P.mask] = 0 # if (item == 'taux') & tauxy: Var['tauxy'][n, 0, :, :] += P[:, :] elif (item == 'tauy') & tauxy: Var['tauxy'][n, 0, :, :] += 1j * P[:, :] else: Var[item][n, 0, :, :] += P[:, :] data.close() # If result dictionary contains only one item, return only the value # of this item. if len(Var.keys()) == 1: Var = Var[Var.keys()[0]] if profile: stdout.write('\r\n') stdout.flush() if DEBUG: print 't: ', t print 'z: ', z print 'y:', y print 'x:', x print 'var: ', Var print 'N: ', N print 'K: ', K print 'J: ', J print 'I:', I print 'shape: ', shape if result == 'full': return t, z, y, x, Var elif result == 'indices': return N, K, J, I, Var elif result == 'var only': return Var else: raise Warning("Result parameter set imporperly to '%s', " "assuming 'var only'." % (result)) return Var
def read(self, var, x=None, y=None, radius=0., tlim=None, ylim=None, xlim=None, missions=None, sort=True, profile=True): """Reads dataset. PARAMETERS var (string) : Variable to be read from dataset. It also accepts special naming conventions in order to rename the original dataset variable and to load alternative variables in case of invalid data according to the syntax '[new_var_name]:var[|other_var]'. x, y (array like, optional) : List of zonal and meridional point coordinate of interest. radius (float, optional) : Search radius in degrees. tlim, ylim, xlim (array like, optional) : The temporal, meridional and zonal limits (minimum, maximum) for which data will be read. missions (array like, optional) : List of missions to read data from. If omitted, defaults available missions on dataset class intialization. sort (boolean optional) : If true, sorts the data record in order of ascendant time, latitude and longitude. profile (boolean, optional) : Sets whether the status is send to screen. RETURNS dat (record array) : Record time-series of 'time', 'latitude', 'longitude', selected variable and 'mission'. """ t0 = time() # Checks input parameters. T = self.variables['time'].data if var.find(':') >= 0: # Checks spetial variable syntax var_name, var = var.split(':') else: var_name = var if tlim == None: tlim = (T.min(), T.max()) if (x != None) | (y != None): x, y = asarray(x), asarray(y) if x.size != y.size: raise ValueError('Zonal and meridional coordinate dimensions ' 'do not match.') npoints = x.size radius2 = radius**2 else: npoints = 0 x = y = [] # if ylim == None: ylim = (-90., 90.) if xlim == None: xlim = (0., 360.) else: # Make sure longitude limits are between 0 and 360. xlim = list(lon360(asarray(xlim))) if missions == None: missions = self.params['missions'] # First we have to select which files will be loaded, which will # depend on the temporal limits given in $t$. sel_time = flatnonzero((T >= floor(min(tlim))) & (T <= ceil(max(tlim)))) N = len(sel_time) # Second we will walk through each of the selected time in the dataset # and load the correspondant file for the available missions. t1 = time() if profile: s = '\rLoading data...' stdout.write(s) stdout.flush() # Reset important variables TIME, LAT, LON, VAR, MISSION = [array([])] * 5 # for i, tm in enumerate(T[sel_time]): t2 = time() for (mission, dset, fname, cycle, orbit) in self.attributes['time_dataset'][tm]: # Skips mission not in missions list. if mission not in missions: continue # Uncompresses gzipped file and opens NetCDF instance. data = self.read_file('%s/%s/%s' % (self.params['path'], mission, fname)) # Reads variable from NetCDF file. raw_time = self.read_variable(data, 'time') raw_lat = self.read_variable(data, 'lat') raw_lon = self.read_variable(data, 'lon') raw_dat = self.read_variable(data, var) # Select relevant data range according to limit parameters sel_from_time = ((raw_time >= min(tlim)) & (raw_time <= max(tlim))) if (ylim != None) | (xlim != None): sel_from_limits = ones(data.dimensions['time'], dtype=bool) else: sel_from_limits = zeros(data.dimensions['time'], dtype=bool) if ylim != None: sel_from_limits = (sel_from_limits & ((raw_lat >= min(ylim)) & (raw_lat <= max(ylim)))) if xlim != None: sel_from_limits = (sel_from_limits & ((raw_lon >= min(xlim)) & (raw_lon <= max(xlim)))) # Select relevant data according to points and search radius. sel_from_radius = zeros(data.dimensions['time'], dtype=bool) for xx, yy in zip(x, y): distance2 = ((raw_lat - yy)**2 + (raw_lon - lon360(xx))**2) sel_from_radius = sel_from_radius | (distance2 <= radius2) # sel_data = flatnonzero(sel_from_time & (sel_from_limits | sel_from_radius) & (~isnan(raw_dat))) _time = raw_time[sel_data] _lat = raw_lat[sel_data] _lon = raw_lon[sel_data] _dat = raw_dat[sel_data] # TIME = append(TIME, _time) LAT = append(LAT, _lat) LON = append(LON, _lon) VAR = append(VAR, _dat) MISSION = append(MISSION, [mission] * len(sel_data)) # self.close_file(data) # # Profiling if profile: s = '\rLoading data... %s ' % (profiler(N, i + 1, t0, t1, t2), ) stdout.write(s) stdout.flush() # if profile: stdout.write('\n') stdout.flush() # Converts the data a structured array DAT = rec.fromarrays((TIME, LAT, LON, VAR, MISSION), dtype=[('time', float64), ('latitude', float64), ('longitude', float64), (var_name, float64), ('mission', '|S3')]) # Some data sorting? if sort: DAT.sort(order=('time', 'latitude', 'longitude'), axis=0) return DAT
def make_index(self, profile=True): """.""" t1 = time() if profile: s = '\rBuilding preliminary time array...' stdout.write(s) stdout.flush() time_mission = dict() time_dataset = dict() N = len(self.params['missions']) for i, mission in enumerate(self.params['missions']): t2 = time() tt1 = time() # mpath = '%s/%s' % (self.params['path'], mission) # Mission path ylist = listdir(mpath) # Year list in mission path Nyear = len(ylist) file_pattern = ('%s_%s_%s_%s_%s_(\d*)_(\d*)_(\d*)_(\d*)_(\d*)_' '(\d*).nc.gz') % ('GW', self.params['level'].upper(), self._labels[self.params['product']], self._labels[mission], self.params['delay'].upper()) # Initializes time mission dictionary time_mission[mission] = dict(data=[], file=[]) for j, yr in enumerate(ylist): tt2 = time() # Lists all the directories in year dlist = listdir('%s/%s' % (mpath, yr)) for dset in dlist: # Lists all the data files in mission in a given year and # matches it with the file pattern. cur_path = '%s/%s/%s' % (mpath, yr, dset) flist = listdir(cur_path) flist.sort() flist, match = reglist(flist, file_pattern) # Convert data and product dates to matplotlib format, i.e. # days since 0001-01-01 UTC and appends to the global # mission and dataset time dictionaries. for k, item in enumerate(match): datetime_start = dates.datestr2num( '%4s-%2s-%2s %2s:%2s:%2s' % (item[0][0:4], item[0][4:6], item[0][6:8], item[1][0:2], item[1][2:4], item[1][4:6]) ) datetime_end = dates.datestr2num( '%4s-%2s-%2s %2s:%2s:%2s' % (item[2][0:4], item[2][4:6], item[2][6:8], item[3][0:2], item[3][2:4], item[3][4:6]) ) time_data = (datetime_start + datetime_end) / 2. cycle = int(item[4]) orbit = int(item[5]) time_mission[mission]['data'].append(time_data) # fname = '%s/%s/%s' % (yr, dset, flist[k]) descriptor = (mission, dset, fname, cycle, orbit) if time_data not in time_dataset.keys(): time_dataset[time_data] = [descriptor] else: time_dataset[time_data].append(descriptor) # time_mission[mission]['file'].append(fname) # # Profiling if profile: s = '\rBuilding preliminary time array for %s: %s ' % ( self._missions[mission], profiler(Nyear, j+1, t0, tt1, tt2), ) stdout.write(s) stdout.flush() # time_mission[mission]['data'] = array( time_mission[mission]['data'] ) time_mission[mission]['file'] = array( time_mission[mission]['file'] ) # Profiling if profile: s = '\rBuilding preliminary time array... %s ' % (profiler(N, i+1, t0, t1, t2),) stdout.write(s) stdout.flush() # if profile: stdout.write('\n') stdout.flush() return time_mission, time_dataset
def make_index(self, profile=True): """.""" t1 = time() if profile: s = '\rBuilding preliminary time array...' stdout.write(s) stdout.flush() time_mission = dict() time_dataset = dict() N = len(self.params['missions']) for i, mission in enumerate(self.params['missions']): t2 = time() tt1 = time() # mpath = '%s/%s' % (self.params['path'], mission) # Mission path ylist = listdir(mpath) # Year list in mission path Nyear = len(ylist) file_pattern = ('%s_%s_%s_%s_%s_(\d*)_(\d*)_(\d*)_(\d*)_(\d*)_' '(\d*).nc.gz') % ('GW', self.params['level'].upper( ), self._labels[self.params['product']], self._labels[mission], self.params['delay'].upper()) # Initializes time mission dictionary time_mission[mission] = dict(data=[], file=[]) for j, yr in enumerate(ylist): tt2 = time() # Lists all the directories in year dlist = listdir('%s/%s' % (mpath, yr)) for dset in dlist: # Lists all the data files in mission in a given year and # matches it with the file pattern. cur_path = '%s/%s/%s' % (mpath, yr, dset) flist = listdir(cur_path) flist.sort() flist, match = reglist(flist, file_pattern) # Convert data and product dates to matplotlib format, i.e. # days since 0001-01-01 UTC and appends to the global # mission and dataset time dictionaries. for k, item in enumerate(match): datetime_start = dates.datestr2num( '%4s-%2s-%2s %2s:%2s:%2s' % (item[0][0:4], item[0][4:6], item[0][6:8], item[1][0:2], item[1][2:4], item[1][4:6])) datetime_end = dates.datestr2num( '%4s-%2s-%2s %2s:%2s:%2s' % (item[2][0:4], item[2][4:6], item[2][6:8], item[3][0:2], item[3][2:4], item[3][4:6])) time_data = (datetime_start + datetime_end) / 2. cycle = int(item[4]) orbit = int(item[5]) time_mission[mission]['data'].append(time_data) # fname = '%s/%s/%s' % (yr, dset, flist[k]) descriptor = (mission, dset, fname, cycle, orbit) if time_data not in time_dataset.keys(): time_dataset[time_data] = [descriptor] else: time_dataset[time_data].append(descriptor) # time_mission[mission]['file'].append(fname) # # Profiling if profile: s = '\rBuilding preliminary time array for %s: %s ' % ( self._missions[mission], profiler(Nyear, j + 1, t0, tt1, tt2), ) stdout.write(s) stdout.flush() # time_mission[mission]['data'] = array( time_mission[mission]['data']) time_mission[mission]['file'] = array( time_mission[mission]['file']) # Profiling if profile: s = '\rBuilding preliminary time array... %s ' % (profiler( N, i + 1, t0, t1, t2), ) stdout.write(s) stdout.flush() # if profile: stdout.write('\n') stdout.flush() return time_mission, time_dataset
def read(self, t=None, z=None, y=None, x=None, N=None, K=None, J=None, I=None, var=None, nonan=True, result='full', profile=False, dummy=False): """Reads dataset. PARAMETERS t, z, y, x (array like, optional) : Sets the time, height, latitude and longitude for which the data will be read. N, K, J, I (array like, optional) : Sets the temporal, vertical, meridional and zonal indices for which the data will be read. var (string, optional) : Indicates which variable of the grid will be read. If the parameter is a list of variables, then the data will be returned as a list of arrays. nonan (boolean, optional) : If set to true (default) changes data values containing NaN to zero, preserving the mask. result (string, optional) : Determines wheter all time, height, latitude, longitude and data will be returned ('full', default), if temporal, vertical, meridional and zonal indices are returned instead ('indices'), or if only variable data is returned ('var only'). components (list, optional) : A list containing which components will be included in the calculation. Options are the seasonal cycle ('seasonal'), westward propagating planetary waves ('planetary'), eddy fields ('eddy') and noise ('noise'). profile (boolean, optional) : Sets whether the status is send to screen. dummy (boolean, optional) : If set to true, does not load data and returns the shape of the array that would have been returned. RETURNS t, z, y, x, dat (array like) : If 'result' is set to 'full', then all coordinates and data variables are returned. N, K, J, I, var (array like) : If 'result' is set to 'indices', then all indices and data variables are returned. dat (array like) : If 'result' is set to 'var only', then the data is returned. """ global DEBUG t1 = time() # Checks input variables for consistency. if (t != None) & (N != None): raise ValueError('Both time and temporal index were provided.') if (z != None) & (K != None): raise ValueError('Both height and vertical index were provided.') if (y != None) & (J != None): raise ValueError( 'Both latitude and meridional index were provided.') if (x != None) & (I != None): raise ValueError('Both latitude and zonal index were provided.') if var == None: var = self.params['var_list'] # Checks for variables indices. Intersects desired input values with # dataset dimesion data. In this dataset, since only surface data is # available, the height values are always zero. if t != None: N = flatnonzero(in1d(self.variables['time'].data, t)) elif N == None: N = arange(self.dimensions['n']) if z != None: K = [0] elif K == None: K = [0] elif K != None: K = [0] if y != None: J = flatnonzero(in1d(self.variables['latitude'].data, y)) elif J == None: J = arange(self.dimensions['j']) if x != None: I = flatnonzero(in1d(self.variables['longitude'].data, y)) elif I == None: I = arange(self.dimensions['i']) # Sets the shape of the data array. shape = (len(N), 1, len(J), len(I)) if dummy: return shape # Selects data according to indices. t = self.variables['time'].data[N] z = self.variables['height'].data y = self.variables['latitude'].data[J] x = self.variables['longitude'].data[I] xx, yy = meshgrid(x, y) II, JJ = meshgrid(I, J) # Ressets variables Var = dict() if ('taux' in var) & ('tauy' in var): tauxy = True else: tauxy = False for item in var: if (item == 'taux') & tauxy: Var['tauxy'] = ma.zeros(shape, dtype=complex) elif (item == 'tauy') & tauxy: continue else: Var[item] = ma.zeros(shape) # Walks through every time index and loads data range from maps. for n, T in enumerate(t): t2 = time() if profile: s = '\rLoading data... %s ' % (profiler(shape[0], n + 1, 0, t1, t2),) stdout.write(s) stdout.flush() # Reads NetCDF file data = netcdf('%s/%s' % (self.params['path'], self.params['file_list'][N[n]]), 'r') for item in var: if (('lon_i' in self.params.keys()) & ('lat_j' in self.params.keys())): P = data.variables[item].data[0, 0, self.params['lat_j'], self.params['lon_i']][JJ, II] else: P = data.variables[item].data[0, 0, JJ, II] P[P <= self.variables[item].missing_value] = nan P = ma.masked_where(isnan(P), P) if nonan: P.data[P.mask] = 0 # if (item == 'taux') & tauxy: Var['tauxy'][n, 0, :, :] += P[:, :] elif (item == 'tauy') & tauxy: Var['tauxy'][n, 0, :, :] += 1j * P[:, :] else: Var[item][n, 0, :, :] += P[:, :] data.close() # If result dictionary contains only one item, return only the value # of this item. if len(Var.keys()) == 1: Var = Var[Var.keys()[0]] if profile: stdout.write('\r\n') stdout.flush() if DEBUG: print 't: ', t print 'z: ', z print 'y:', y print 'x:', x print 'var: ', Var print 'N: ', N print 'K: ', K print 'J: ', J print 'I:', I print 'shape: ', shape if result == 'full': return t, z, y, x, Var elif result == 'indices': return N, K, J, I, Var elif result == 'var only': return Var else: raise Warning("Result parameter set imporperly to '%s', " "assuming 'var only'." % (result)) return Var
def read(self, t=None, z=None, y=None, x=None, N=None, K=None, J=None, I=None, var=None, nonan=True, result='full', profile=False, dummy=False): """Reads dataset. PARAMETERS t, z, y, x (array like, optional) : Sets the time, height, latitude and longitude for which the data will be read. N, K, J, I (array like, optional) : Sets the temporal, vertical, meridional and zonal indices for which the data will be read. var (string, optional) : Indicates which variable of the grid will be read. If the parameter is a list of variables, then the data will be returned as a list of arrays. nonan (boolean, optional) : If set to true (default) changes data values containing NaN to zero, preserving the mask. result (string, optional) : Determines wheter all time, height, latitude, longitude and data will be returned ('full', default), if temporal, vertical, meridional and zonal indices are returned instead ('indices'), or if only variable data is returned ('var only'). profile (boolean, optional) : Sets whether the status is send to screen. dummy (boolean, optional) : If set to true, does not load data and returns the shape of the array that would have been returned. RETURNS t, z, y, x, dat (array like) : If 'result' is set to 'full', then all coordinates and data variables are returned. N, K, J, I, var (array like) : If 'result' is set to 'indices', then all indices and data variables are returned. dat (array like) : If 'result' is set to 'var only', then the data is returned. """ global DEBUG t1 = time() # Checks input variables for consistency. if (t != None) & (N != None): raise ValueError('Both time and temporal index were provided.') if (z != None) & (K != None): raise ValueError('Both height and vertical index were provided.') if (y != None) & (J != None): raise ValueError( 'Both latitude and meridional index were provided.') if (x != None) & (I != None): raise ValueError('Both latitude and zonal index were provided.') if var == None: var = self.params['var_list'] # Checks for variables indices. Intersects desired input values with # dataset dimesion data. In this dataset, since only surface data is # available, the height values are always zero. if t != None: N = flatnonzero(in1d(self.variables['time'].data, t)) elif N == None: N = arange(self.dimensions['n']) if z != None: K = [0] elif K == None: K = [0] elif K != None: K = [0] if y != None: J = flatnonzero(in1d(self.variables['latitude'].data, y)) elif J == None: J = arange(self.dimensions['j']) if x != None: I = flatnonzero(in1d(self.variables['longitude'].data, x)) elif I == None: I = arange(self.dimensions['i']) # Sets the shape of the data array. shape = (len(N), 1, len(J), len(I)) if dummy: return shape # Selects data according to indices. t = self.variables['time'].data[N] z = self.variables['height'].data y = self.variables['latitude'].data[J] x = self.variables['longitude'].data[I] xx, yy = meshgrid(x, y) II, JJ = meshgrid(I, J) # Ressets variables Var = dict() Datasets = dict() for item in var: Var[item] = ma.zeros(shape) try: Datasets[self.params['var_tcid'][item][0]][1].append( self.params['var_tcid'][item][2]) except: Datasets[self.params['var_tcid'][item][0]] = [ self.params['var_tcid'][item][1], [self.params['var_tcid'][item][2]] ] # Walks through every time index and loads data range from maps. for n, T in enumerate(t): t2 = time() if profile: s = '\rLoading data... %s ' % (profiler( shape[0], n + 1, 0, t1, t2), ) stdout.write(s) stdout.flush() # Reads NetCDF file for each dataset for Dataset, (Datavar, Datagrid) in Datasets.items(): params = dict(path=self.params['path'], dataset=Dataset, datavar=Datavar, **self.params['file_list'][N[n]]) fname = self.create_filename(**params) data = self.read_file(fname) # for Grid in Datagrid: nvar = self.params['var_dict']['{0}_{1}'.format( Dataset, Grid)] if (('lon_i' in self.params.keys()) & ('lat_j' in self.params.keys())): P = data.variables[Grid].data.T[ self.params['lat_j'], self.params['lon_i']][JJ, II] else: P = data.variables[Grid].data.T[JJ, II] P[P >= self.variables[item].missing_value] = nan P = ma.masked_where(isnan(P), P) if nonan: P.data[P.mask] = 0 # Var[nvar][n, 0, :, :] += P[:, :] # self.close_file(data) # If result dictionary contains only one item, return only the value # of this item. if len(Var.keys()) == 1: Var = Var[Var.keys()[0]] if profile: stdout.write('\r\n') stdout.flush() if DEBUG: print 't: ', t print 'z: ', z print 'y:', y print 'x:', x print 'var: ', Var print 'N: ', N print 'K: ', K print 'J: ', J print 'I:', I print 'shape: ', shape if result == 'full': return t, z, y, x, Var elif result == 'indices': return N, K, J, I, Var elif result == 'var only': return Var else: raise Warning("Result parameter set imporperly to '%s', " "assuming 'var only'." % (result)) return Var
def __init__(self, delay='dt', missions=None, zone='global', product='sla', variable='vxxc', path=None, profile=True): """ Initializes the dataset class for reading along-track gridded sequential data from the SSALTO/DUACS distributed by Aviso. PARAMETERS delay (text, optional) : Selects whether delayed time products (dt, default) or near-real time products are read. missions (text, array like, optional) : Determines the satellite missions to be selected (i. e. e1, e2, tp, tpn, g2, j1, j1n, j2, en, enn, c2, al) If set to 'none', all available missions are used. zone (text, optional) : Geographic coverage of the selected products, global -- Global geographic coverage; med -- Mediterranean; blacksea -- Black Sea; moz -- Mozambique; arctic -- Arctic; europe -- Europe. product (text, optional) : Variable to be read (sla -- sea level anomaly or adt -- absolute dynamic topography) variable (text, optional) : Either 'vfec' for validated, filtered, sub-sampled and LWE-corrected; or 'vxxc' for validated, non-filtered, non-sub-sampled and LWE-corrected data. path (text, optional) : Path to the dataset files. """ t0 = time() # Checks all the input parameters for consistency if delay not in self._delays.keys(): raise ValueError('Invalid delay parameter "%s".' % (delay)) if missions == None: missions = self._missions.keys() elif type(missions) == str: if missions in self._missions.keys(): missions = [missions] else: raise ValueError('Invalid mission "%s".' % (missions)) elif type(missions) == list: for item in missions: if item not in self._missions.keys(): raise ValueError('Invalid mission "%s".' % (item)) else: raise ValueError('Invalid mission "%s".' % (missions)) if zone not in self._zones.keys(): raise ValueError('Invalid geographic zone "%s".' % (zone)) if product not in self._products.keys(): raise ValueError('Invalid product "%s".' % (product)) if variable not in self._filterings.keys(): raise ValueError('Invalid variable "%s".' % (variable)) # Initializes parameters and attributes in class variable self.attributes = dict() self.dimensions = dict(n=0, k=0, j=0, i=0) self.coordinates = dict(n=None, k=None, j=None, i=None) self.variables = dict() self.params = dict( delay = delay, missions = missions, zone = zone, product = product, variable = variable ) # Creates an universally unique identifiers (UUID) for this instance self.params['uuid'] = str(uuid()) # Sets path and missing value parameters if path == None: path = '%s/%s/%s/%s/%s' % ('/academia/data/raw/aviso', self._delays[delay], 'along-track', self._filterings[variable], product) self.params['path'] = path self.params['missing_value'] = -9999. # Determines the temporal range of the whole data set per mission t1 = time() if profile: s = '\rBuilding preliminary time array...' stdout.write(s) stdout.flush() time_mission = dict() time_dataset = dict() N = len(self.params['missions']) for i, mission in enumerate(self.params['missions']): t2 = time() # mpath = '%s/%s' % (path, mission) # Mission path ylist = listdir(mpath) # Year list in mission path file_pattern = '%s_%s_%s_%s_%s_(\d*)_(\d*).nc.gz' % (delay, zone, mission, product, variable) time_mission[mission] = dict(data=[], product=[], file=[]) for yr in ylist: # Lists all the data files in mission in a given year and # matches it with the file pattern. flist = listdir('%s/%s' % (mpath, yr)) flist.sort() flist, match = reglist(flist, file_pattern) # Convert data and product dates to matplotlib format, i.e. # days since 0001-01-01 UTC and appends to the global mission # and dataset time dictionaries. for j, item in enumerate(match): time_data = dates.datestr2num('%4s-%2s-%2s 12:00' % (item[0][:4], item[0][4:6], item[0][6:])) time_mission[mission]['data'].append(time_data) fname = '%s/%s' % (yr, flist[j]) descriptor = (mission, fname) if time_data not in time_dataset.keys(): time_dataset[time_data] = [descriptor] else: time_dataset[time_data].append(descriptor) # time_product = dates.datestr2num('%4s-%2s-%2s 12:00' % (item[1][:4], item[1][4:6], item[1][6:])) time_mission[mission]['product'].append(time_product) # time_mission[mission]['file'].append(fname) # time_mission[mission]['data'] = array( time_mission[mission]['data'] ) time_mission[mission]['product'] = array( time_mission[mission]['product'] ) time_mission[mission]['file'] = array( time_mission[mission]['file'] ) # Profiling if profile: s = '\rBuilding preliminary time array... %s ' % (profiler(N, i+1, t0, t1, t2),) stdout.write(s) stdout.flush() # if profile: stdout.write('\n') stdout.flush() # self.attributes['time_mission'] = time_mission self.attributes['time_dataset'] = time_dataset # Updates dimensions, coordinates and creates time variable self.dimensions['n'] = len(time_dataset) self.coordinates['n'] ='time' self.variables['time'] = atlantis.data.variable( canonical_units = 'days since 0001-01-01 UTC', data = array(sorted(time_dataset.keys())), height = atlantis.data.get_standard_variable('height', data=[0.]), latitude = atlantis.data.get_standard_variable('latitude'), longitude = atlantis.data.get_standard_variable('longitude'), ) return None
def read(self, x=None, y=None, radius=0., tlim=None, ylim=None, xlim=None, missions=None, sort=True, profile=True): """Reads dataset. PARAMETERS x, y (array like, optional) : List of zonal and meridional point coordinate of interest. radius (float, optional) : Search radius in degrees. tlim, ylim, xlim (array like, optional) : The temporal, meridional and zonal limits (minimum, maximum) for which data will be read. missions (array like, optional) : List of missions to read data from. If omitted, defaults available missions on dataset class intialization. sort (boolean optional) : If true, sorts the data record in order of ascendant time, latitude and longitude. profile (boolean, optional) : Sets whether the status is send to screen. RETURNS dat (record array) : Record time-series of 'time', 'latitude', 'longitude', selected variable and 'mission'. """ t0 = time() # Checks input parameters. T = self.variables['time'].data if tlim == None: tlim = (T.min(), T.max()) if (x != None) | (y != None): x, y = asarray(x), asarray(y) if x.size != y.size: raise ValueError('Zonal and meridional coordinate dimensions ' 'do not match.') npoints = x.size radius2 = radius ** 2 else: npoints = 0 x = y = [] # if ylim == None: ylim = (-90., 90.) if xlim == None: xlim = (0., 360.) else: # Make sure longitude limits are between 0 and 360. xlim = list(lon360(asarray(xlim))) if missions == None: missions = self.params['missions'] # Aviso uses time in days since 1950-01-01 00:00:00 UTC, therefore # we have to calculate the initial time in matplotlib's format. We # also have to determine the proper variable using product name. T0 = dates.datestr2num('1950-01-01 00:00:00 UTC') var = self.params['product'].upper() # First we have to select which files will be loaded, which will # depend on the temporal limits given in $t$. sel_time = flatnonzero((T >= floor(min(tlim))) & (T <= ceil(max(tlim)))) N = len(sel_time) # Second we will walk through each of the selected time in the dataset # and load the correspondant file for the available missions. t1 = time() if profile: s = '\rLoading data...' stdout.write(s) stdout.flush() # Reset important variables TIME, LAT, LON, VAR, MISSION = [array([])] * 5 # for i, tm in enumerate(T[sel_time]): t2 = time() for (mission, fname) in self.attributes['time_dataset'][tm]: # Skips mission not in missions list. if mission not in missions: continue # Uncompresses gzipped file and opens NetCDF instance. data = self.read_file('%s/%s/%s' % (self.params['path'], mission, fname)) # Retrieve the scale factor for each variable scale_lat = data.variables['latitude'].scale_factor scale_lon = data.variables['latitude'].scale_factor scale_dat = data.variables[var].scale_factor # Get the raw time, latitude and longitude raw_time = data.variables['time'].data + T0 raw_lat = data.variables['latitude'].data * scale_lat raw_lon = data.variables['longitude'].data * scale_lon # Select relevant data range according to limit parameters sel_from_time = ( (raw_time >= min(tlim)) & (raw_time <= max(tlim)) ) sel_from_limits = zeros(data.dimensions['time'], dtype=bool) if ylim != None: sel_from_limits = (sel_from_limits | ((raw_lat >= min(ylim)) & (raw_lat <= max(ylim)))) if xlim != None: sel_from_limits = (sel_from_limits | ((raw_lon >= min(xlim)) & (raw_lon <= max(xlim)))) # Select relevant data according to points and search radius. sel_from_radius = zeros(data.dimensions['time'], dtype=bool) for xx, yy in zip(x, y): distance2 = ((raw_lat - yy) ** 2 + (raw_lon - lon360(xx)) ** 2) sel_from_radius = sel_from_radius | (distance2 <= radius2) # sel_data = flatnonzero(sel_from_time & (sel_from_limits | sel_from_radius)) _time = raw_time[sel_data] _lat = raw_lat[sel_data] _lon = raw_lon[sel_data] _dat = data.variables[var].data[sel_data] * scale_dat # TIME = append(TIME, _time) LAT = append(LAT, _lat) LON = append(LON, _lon) VAR = append(VAR, _dat) MISSION = append(MISSION, [mission] * len(sel_data)) # self.close_file(data) # # Profiling if profile: s = '\rLoading data... %s ' % (profiler(N, i+1, t0, t1, t2),) stdout.write(s) stdout.flush() # if profile: stdout.write('\n') stdout.flush() # Converts the data a structured array DAT = rec.fromarrays((TIME, LAT, LON, VAR, MISSION), dtype=[('time', float64), ('latitude', float64), ('longitude', float64), (self.params['product'], float64), ('mission', '|S3')]) #DAT = hstack((TIME[:, None], LAT[:, None], LON[:, None], # VAR[:, None], MISSION[:, None])).view(dtype=[('time', float64), # ('latitude', float64), ('longitude', float64), # (self.params['product'], float64), ('mission', '|S3')]) # Some data sorting? if sort: DAT.sort(order=('time', 'latitude', 'longitude'), axis=0) return DAT