def get_vcm(self, mask): # mask = 'cal333+cal05+cal20+cal80+csat' for instance if '+' in mask: names = mask.split('+') to_read = [name for name in names if name not in self.data] ds = da.read_nc(self.filename, to_read, axis='tai_time', verbose=self.verbose) for name in to_read: self.data[name] = ds[name].values if 'csat' in names: # negative data can happen e.g. for csat when there are no files. # it does *not* happen when the are no colocated profile, as far as I know. # need to do sthing better than that ? self.data['csat'] = np.clip(self.data['csat'], 0, 3) output = self.data[names[0]] for name in names[1:]: output += self.data[name] else: if mask not in self.data: self.data[mask] = da.read_nc(self.filename, mask, axis='tai_time').values if 'csat' in mask: self.data[mask] = np.clip(self.data[mask], 0, 3) output = self.data[mask] output = np.clip(output, 0, 3) return output
def test_format(dim_array, tmpdir): fname = tmpdir.join("test.nc").strpath # have test.nc in some temporary directory a = DimArray([1,2], dims=['xx0']) b = DimArray([3.,4.,5.], dims=['xx1']) a.write_nc(fname,"a", mode='w') b.write_nc(fname,"b", mode='a') # b.write_nc(fname.replace('.nc','netcdf3.nc'),name="b", mode='w', format='NETCDF3_CLASSIC') ds = da.Dataset([('a',a),('b',b)]) # NETCDF3_CLASSIC ds.write_nc(fname.replace('.nc','netcdf3.nc'), mode='w', format='NETCDF3_CLASSIC') dscheck = da.read_nc(fname.replace('.nc','netcdf3.nc')) assert_equal_datasets(ds, dscheck) # # NETCDF3_64bit # ds.write_nc(fname.replace('.nc','netcdf3_64b.nc'), mode='w', format='NETCDF3_64BIT') # dscheck = da.read_nc(fname.replace('.nc','netcdf3_64b.nc')) # assert_equal_datasets(ds, dscheck) data = read_nc(fname) assert(np.all(data['a'] == a)) assert(np.all(data['b'] == b)) ds = da.Dataset(a=a, b=b) for k in ds: assert(np.all(ds[k] == data[k]))
def my_read_vcm(f=f, verbose=False): filename = f lon = da.read_nc(filename, 'lon', verbose=verbose).values lat = da.read_nc(filename, 'lat', verbose=verbose).values data = da.Dataset() data['cal333'] = da.read_nc(filename, 'cal333', verbose=verbose) altitude = data['cal333'].altitude
def main(window): window = int(window) years = range(2006, 2015) fullvcm = [] fullnprof = [] for year in years: mask = 'out.{:02d}/{:04d}/*.nc4'.format(window, year) print mask try: vcm = da.read_nc(mask, vcm_name, axis='file') except ValueError: print 'No monthlies for {:04d}, skipping'.format(year) continue # vcm.reset_axis(filename_to_datetime, 'file') nprof = da.read_nc(mask, 'nprof', axis='file') # nprof.reset_axis(filename_to_datetime, 'file') fullvcm.append(vcm) fullnprof.append(nprof) vcm = da.concatenate(fullvcm, axis='file') nprof = da.concatenate(fullnprof, axis='file') print vcm, nprof np.savez('series_%d.npz' % window, vcm=vcm, nprof=nprof, altmin=vcm.labels[1], time=vcm.labels[0], lon=vcm.labels[2])
def sum_arrays_from_files(filemask, array_names=None): if array_names is None: datasets = da.read_nc(filemask, axis='day') else: datasets = da.read_nc(filemask, array_names, axis='day') datasets = datasets.sum(axis='day') return datasets
def main(mask='out/200607/*nc4'): dset = da.read_nc(mask, ['lat', 'cloudpts'], axis='tai_time') npts = dset['cloudpts'] lat = dset['lat'] h_cprof, xx = np.histogram(lat, latbins, weights=1. * (npts > 0)) h_nprof, xx = np.histogram(lat, latbins) fraction = h_cprof / (h_nprof) plt.figure(figsize=[10,3]) plt.plot(latbins[:-1], 100. * fraction, lw=0.5) plt.grid() plt.xlabel('Latitude') plt.ylabel('Percents') plt.title('Fraction of cloudsat-only cloudy profiles') npts, xx = np.histogram(lat, latbins, weights = npts) npts_per_prof = 1. * npts / (h_cprof *3) plt.figure(figsize=[10,3]) plt.plot(latbins[:-1], npts_per_prof, lw=0.5) plt.grid() plt.xlabel('Latitude') plt.title('Number of cloudy points in cloudsat-only cloudy profiles') plt.show()
def gather(mask): import glob flist = glob.glob(mask) for f in flist: print f dset = da.read_nc(f, ["csat", "lat", "cloudpts"]) csat = dset["csat"].values lat = dset["lat"].values altitude = dset["csat"].altitude idx = dset["cloudpts"].values > 0 del dset cpts = dict() nprof = dict() for l in lats: idx1 = np.where((lat >= lats[l][0]) & (lat < lats[l][1]))[0] idx2 = np.where((lat >= -lats[l][1]) & (lat < -lats[l][0]))[0] idx = np.concatenate([idx1, idx2]) if l in cpts: nprof[l] = nprof[l] + idx.shape[0] cpts[l] = cpts[l] + np.take(csat, idx, axis=0).sum(axis=0) else: nprof[l] = np.sum(idx) cpts[l] = np.take(csat, idx, axis=0).sum(axis=0) cprofl = dict() for l in lats: cprofl[l] = 100.0 * cpts[l] / nprof[l] return cprofl, altitude
def gather(mask): import glob flist = glob.glob(mask) for f in flist: print f dset = da.read_nc(f, ['csat', 'lat', 'cloudpts']) csat = dset['csat'].values lat = dset['lat'].values altitude = dset['csat'].altitude idx = (dset['cloudpts'].values > 0) del dset cpts = dict() nprof = dict() for l in lats: idx1 = np.where((lat >= lats[l][0]) & (lat < lats[l][1]))[0] idx2 = np.where((lat >= -lats[l][1]) & (lat < -lats[l][0]))[0] idx = np.concatenate([idx1, idx2]) if l in cpts: nprof[l] = nprof[l] + idx.shape[0] cpts[l] = cpts[l] + np.take(csat, idx, axis=0).sum(axis=0) else: nprof[l] = np.sum(idx) cpts[l] = np.take(csat, idx, axis=0).sum(axis=0) cprofl = dict() for l in lats: cprofl[l] = 100. * cpts[l] / nprof[l] return cprofl, altitude
def aggregate_arrays_from_files(files, array_name, summed_along=None): aggregated = None files.sort() prevmax = 0 for f in files: data = da.read_nc(f) if array_name not in data: continue array = data[array_name] if summed_along is not None: array = array.sum(axis=summed_along) if aggregated is None: aggregated = 1. * array else: aggregated += array if aggregated.max() < prevmax: print 'PROBLEME !' print 'Previous maximum = ', prevmax, ', current max = ', aggregated.max() return aggregated
def show_file(filename, title): data = da.read_nc(filename) vcm05 = data['vcm_cal333'] vcm_lonlat = 1. * vcm05.sum(axis='altitude') / data['nprof'] pcolor_vcm(vcm_lonlat, title)
def compound_precip_temp_index(combinations, out_file): """ Not documented yet """ out = {} for name, conditions in combinations.items(): conds = [] description = [] for condition in conditions: nc = da.read_nc(condition[0]) conds.append(nc[condition[1]].squeeze()) description.append(nc[condition[1]].description) compound_state = conds[0].copy() compound_state[:] = False for cond in conds: compound_state += cond compound_state /= len(conds) out[name] = da.DimArray(np.array(compound_state.values, dtype=np.byte), axes=compound_state.axes, dims=compound_state.dims, dtype=np.byte) out[name].description = ' AND '.join(description) da.Dataset(out).write_nc(out_file)
def main(mask='out/200607/*nc4'): dset = da.read_nc(mask, ['lat', 'cloudpts'], axis='tai_time') npts = dset['cloudpts'] lat = dset['lat'] h_cprof, xx = np.histogram(lat, latbins, weights=1. * (npts > 0)) h_nprof, xx = np.histogram(lat, latbins) fraction = h_cprof / (h_nprof) plt.figure(figsize=[10, 3]) plt.plot(latbins[:-1], 100. * fraction, lw=0.5) plt.grid() plt.xlabel('Latitude') plt.ylabel('Percents') plt.title('Fraction of cloudsat-only cloudy profiles') npts, xx = np.histogram(lat, latbins, weights=npts) npts_per_prof = 1. * npts / (h_cprof * 3) plt.figure(figsize=[10, 3]) plt.plot(latbins[:-1], npts_per_prof, lw=0.5) plt.grid() plt.xlabel('Latitude') plt.title('Number of cloudy points in cloudsat-only cloudy profiles') plt.show()
def main(mask='out/200607/*nc4'): dset = da.read_nc(mask, ['lon', 'lat', 'cloudpts'], axis='tai_time') cloudy = 1 * (dset['cloudpts'] > 0) h, xx, yy = np.histogram2d(dset['lon'].values, dset['lat'].values, bins=[lonbins, latbins], weights=cloudy) nprof, xx, yy = np.histogram2d(dset['lon'].values, dset['lat'].values, bins=[lonbins, latbins]) map_show(lonbins, latbins, h, nprof)
def gather_info_track(self, overwrite=False): out_file = self._working_dir + 'surrounding_info.nc' if overwrite and os.path.isfile(out_file): os.system('rm ' + out_file) elif overwrite == False and os.path.isfile(out_file): self._track_info = da.read_nc(out_file) return self._track_info track_info = {} for id_, track in self._tcs.items(): track = track[np.isfinite(track[:, 't']), :] info = np.zeros([ 6, track.shape[0], self._win2 * 2 + 1, self._win2 * 2 + 1 ]) * np.nan for i, p in enumerate(track.values.tolist()): box_2 = [ int(bb) for bb in self.get_box(p[1], p[2], self._win2) ] info[0, i, abs(p[1] - box_2[0] - 12):box_2[1] - p[1] + 12, abs(p[2] - box_2[2] - 12):box_2[3] - p[2] + 12] = self._VO[int(p[0]), box_2[0]:box_2[1], box_2[2]:box_2[3]] info[1, i, abs(p[1] - box_2[0] - 12):box_2[1] - p[1] + 12, abs(p[2] - box_2[2] - 12):box_2[3] - p[2] + 12] = self._MSLP[int(p[0]), box_2[0]:box_2[1], box_2[2]:box_2[3]] info[2, i, abs(p[1] - box_2[0] - 12):box_2[1] - p[1] + 12, abs(p[2] - box_2[2] - 12):box_2[3] - p[2] + 12] = self._Wind10[int(p[0]), box_2[0]:box_2[1], box_2[2]:box_2[3]] if self._SST is not None: info[3, i, abs(p[1] - box_2[0] - 12):box_2[1] - p[1] + 12, abs(p[2] - box_2[2] - 12):box_2[3] - p[2] + 12] = self._SST[int(p[0]), box_2[0]:box_2[1], box_2[2]:box_2[3]] if self._T is not None: info[4, i, abs(p[1] - box_2[0] - 12):box_2[1] - p[1] + 12, abs(p[2] - box_2[2] - 12):box_2[3] - p[2] + 12] = self._T[int(p[0]), 0, box_2[0]:box_2[1], box_2[2]:box_2[3]] info[5, i, abs(p[1] - box_2[0] - 12):box_2[1] - p[1] + 12, abs(p[2] - box_2[2] - 12):box_2[3] - p[2] + 12] = self._T[int(p[0]), 1, box_2[0]:box_2[1], box_2[2]:box_2[3]] track_info[str(id_)] = da.DimArray( info, axes=[['VO', 'MSLP', 'Wind10', 'SST', 'T850', 'T500'], range(len(track.time)), range(self._win2 * 2 + 1), range(self._win2 * 2 + 1)], dims=['time_id', 'variable', 'lat', 'lon']) self._track_info = da.Dataset(track_info) self._track_info.write_nc(out_file, mode='w')
def precip_to_index( in_file, out_file, var_name='pr', unit_multiplier=1, states={ 'dry': { 'mod': 'below', 'threshold': 1 }, 'wet': { 'mod': 'above', 'threshold': 1 }, '5mm': { 'mod': 'above', 'threshold': 5 }, '10mm': { 'mod': 'above', 'threshold': 10 } }): """ Classifies daily precipitation into 'wet' and 'dry' days based on a `threshold` Parameters ---------- anom_file: str filepath of a daily precipitation file. The variable that is read in can be specified with `var_name`. out_file: str filepath of a state file var_name: str name of the variable read in `anom_file` threshold: float,default=0.5 threshold used to differentiate between wet and dry days unit_multiplier: float,default=1 factor to multiply daily precipiation with to get mm as units overwrite: bool overwrites existing files """ nc = da.read_nc(in_file) pr = nc[var_name].squeeze() * unit_multiplier out = {} for name, state_dict in states.items(): state = nc[var_name].squeeze().copy() state[:] = False if state_dict['mod'] == 'above': state[pr >= state_dict['threshold']] = True if state_dict['mod'] == 'below': state[pr <= state_dict['threshold']] = True out[name] = da.DimArray(np.array(state.values, dtype=np.byte), axes=state.axes, dims=state.dims, dtype=np.byte) out[name].description = 'days with precipitation ' + state_dict[ 'mod'] + ' ' + str(state_dict['threshold']) + 'mm' da.Dataset(out).write_nc(out_file)
def dayfiles_to_windows(files, outfile): window_sum = da.read_nc(files, axis='day', verbose=False) window_sum.sum(axis='day') #print 'Saving ' + outfile #print ' cf = ', 100. * np.sum(window_sum['cal333+cal05+cal20+cal80+csat_cprof']) / np.sum(window_sum['nprof']) cf = 100. * np.sum(window_sum['cal333+cal05+cal20+cal80+csat_cprof'])/np.sum(window_sum['nprof']) if cf < 60: print 'WARNING: cf < 60%' window_sum.write_nc(outfile, 'w')
def test_redundant_axis(tmpdir): # see test in test_dataset.py ds = da.Dataset() ds["myaxis"] = da.DimArray([10,20,30], da.Axis([10,20,30], 'myaxis')) assert list(ds.keys()) == ["myaxis"] assert ds.dims == ("myaxis",) fname = tmpdir.join("test_redundant_axis.nc").strpath # have test.nc in some temporary directory ds.write_nc(fname) # read whole dataset: variable not counted as variable ds = da.read_nc(fname) assert list(ds.keys()) == [] assert ds.dims == ("myaxis",) # specify dimension variable ds = da.read_nc(fname, ["myaxis"]) assert list(ds.keys()) == ["myaxis"] assert ds.dims == ("myaxis",)
def __init__(self, filename, verbose=True): self.filename = filename self.verbose = verbose dimarrays = da.read_nc(filename, ['lon', 'lat', 'cal333'], axis='tai_time', verbose=verbose) self.altitude = dimarrays['cal333'].altitude self.lon = dimarrays['lon'].values self.lat = dimarrays['lat'].values self.data = {'cal333':dimarrays['cal333'].values} self.time = dimarrays['cal333'].tai_time
def detect_dieng(self, overwrite=False, dis_VO_max=8, min_number_cells=6, thr_VO=1 * 10**(-5), thr_RH=50): out_file = self._working_dir + str( self._identifier) + '_detected_positions.nc' if overwrite and os.path.isfile(out_file): os.system('rm ' + out_file) elif overwrite == False and os.path.isfile(out_file): self._detected = da.read_nc(out_file)['detected'] return self._detected # convert distances from degrees into grid-cells dis_VO_max = self.degree_to_step(dis_VO_max) detect = np.array([[np.nan] * 6]) print('detecting\n10------50-------100') for t, progress in zip( self._time_i, np.array([['-'] + [''] * (len(self._time_i) / 20 + 1)] * 20).flatten()[0:len(self._time_i)]): sys.stdout.write(progress) sys.stdout.flush() coords = peak_local_max(self._VO[t, :, :], min_distance=int(dis_VO_max)) #print(coords) for y_, x_ in zip(coords[:, 0], coords[:, 1]): if self._VO[t, y_, x_] > thr_VO: yy, xx = self.find_group(field=self._VO[t, :, :], y=y_, x=x_, thresh=thr_VO) if len(yy) >= min_number_cells: if self._RH[t, y_, x_] >= thr_RH and self._lat[y_] < 35: #y_,x_ = sum(yy) / len(yy), sum(xx) / len(yy) tmp = [ t, y_, x_, self._VO[t, y_, x_], self._RH[t, y_, x_], len(yy) ] detect = np.concatenate((detect, np.array([tmp]))) self._detected = da.DimArray( np.array(detect[1:, :]), axes=[ range(detect.shape[0] - 1), ['t', 'y', 'x', 'VO', 'RH', 'members'] ], dims=['ID', 'z']) da.Dataset({'detected': self._detected}).write_nc(out_file, mode='w') print('\ndone') return self._detected
def window_cloud_ceiling(f): dset = da.read_nc(f, ["cal333+cal05+cal20+cal80+csat", "cal333+cal05+cal20+cal80+csat_cprof", "nprof"]) vcm, cprof, nprof = ( dset["cal333+cal05+cal20+cal80+csat"], dset["cal333+cal05+cal20+cal80+csat_cprof"], dset["nprof"], ) cf_lat, cf_lat2 = compute_cf(vcm, cprof, nprof) ceiling = tropic_width.cloud_cover_top(vcm.altitude, cf_lat) return ceiling, vcm.lat, cprof, nprof
def show_file(input, title): data = da.read_nc(input) vcm_prof = data['cal333+cal05+cal20+cal80+csat'] nprof = data['cal333+cal05+cal20+cal80+csat_cprof'] cf_lat = 1. * vcm_prof.values.T / nprof.values cf_lat = cf_lat.T cf_lat = np.ma.masked_invalid(cf_lat) pcolor_zonal(vcm_prof.labels[0], vcm_prof.labels[1], cf_lat, 'Cloud fraction ' + title)
def window_cloud_ceiling(f): dset = da.read_nc(f, [ 'cal333+cal05+cal20+cal80+csat', 'cal333+cal05+cal20+cal80+csat_cprof', 'nprof' ]) vcm, cprof, nprof = dset['cal333+cal05+cal20+cal80+csat'], dset[ 'cal333+cal05+cal20+cal80+csat_cprof'], dset['nprof'] cf_lat, cf_lat2 = compute_cf(vcm, cprof, nprof) ceiling = tropic_width.cloud_cover_top(vcm.altitude, cf_lat) return ceiling, vcm.lat, cprof, nprof
def main(mask='out/200607/*.nc4'): print mask files = glob.glob(mask) print len(files) for f in files: d = da.read_nc(f) print f for n in 'cal333+cal05+cal20+cal80+csat_cprof', 'cal333+cal05+cal20+cal80_cprof': cf = 100. * d[n].sum() / d['nprof'].sum() print ' ', n, d['nprof'].sum(), d[n].sum(), cf
def dayfiles_to_windows(files, outfile): window_sum = da.read_nc(files, axis='day', verbose=False) window_sum.sum(axis='day') #print 'Saving ' + outfile #print ' cf = ', 100. * np.sum(window_sum['cal333+cal05+cal20+cal80+csat_cprof']) / np.sum(window_sum['nprof']) cf = 100. * np.sum( window_sum['cal333+cal05+cal20+cal80+csat_cprof']) / np.sum( window_sum['nprof']) if cf < 60: print 'WARNING: cf < 60%' window_sum.write_nc(outfile, 'w')
def test_standalone_axis(tmpdir): # see test in test_dataset.py ds = da.Dataset() ds.axes.append(da.Axis([10,20,30], 'myaxis')) assert list(ds.keys()) == [] assert ds.dims == ("myaxis",) fname = tmpdir.join("test_standalone_axis.nc").strpath # have test.nc in some temporary directory ds.write_nc(fname) # read again ds = da.read_nc(fname) assert list(ds.keys()) == [] assert ds.dims == ("myaxis",)
def temp_anomaly_to_ind_old(anom_file, out_file, var_name='tas', seasons={ 'MAM': [3, 4, 5], 'JJA': [6, 7, 8], 'SON': [9, 10, 11], 'DJF': [12, 1, 2] }, overwrite=True): """ Classifies daily temperature anomalies into 'cold' and 'warm' days using the season and grid-cell specific median as threshold Parameters ---------- anom_file: str filepath of a temperature anomalies file. The variable that is read in can be specified with `var_name`. out_file: str filepath of a state file var_name: str name of the variable read in `anom_file` seasons: dict, default=`{'MAM':{'months':[3,4,5],'index':0}, 'JJA':{'months':[6,7,8],'index':1}, 'SON':{'months':[9,10,11],'index':2}, 'DJF':{'months':[12,1,2],'index':3}}`` dictionnary used to cluster detected periods into seasons. If no seasonal analysis is required use `seasons={'year':{'months':range(12),'index':0}}` overwrite: bool overwrites existing files """ nc = da.read_nc(anom_file) if 'calendar' in nc['time'].attrs.keys(): datevar = num2date(nc['time'].values, units=nc['time'].units, calendar=nc['time'].calendar) else: datevar = num2date(nc['time'].values, units=nc['time'].units) month = np.array([date.month for date in datevar]) anom = nc[var_name].squeeze() state = nc[var_name].squeeze().copy() * np.nan for season in seasons.keys(): days_in_season = np.where((month == seasons[season][0]) | (month == seasons[season][1]) | (month == seasons[season][2]))[0] seasonal_median = np.nanmedian(anom.ix[days_in_season, :, :], axis=0) anom.ix[days_in_season, :, :] -= seasonal_median state[anom >= 0] = 1 state[anom < 0] = -1 if overwrite: os.system('rm ' + out_file) state.description = 'daily anomalies - seasonal medain of daily anomalies at grid cell level. positive anomalies -> 1 negative anomalies -> -1' da.Dataset({'state': state}).write_nc(out_file)
def main(mask='out/200607/*nc4'): dset = da.read_nc(mask, ['cloudpts'], axis='tai_time') cpts = dset['cloudpts'] cpts = cpts/3. print 'Range of cloudy points in cloudsat-only cloudy profiles : ', np.min(cpts.values), np.max(cpts.values) plt.figure() cpts.plot(lw=0.5) plt.figure() plt.hist(cpts[cpts>0].values, 20) plt.show()
def main(window): window = int(window) years = range(2006,2015) fullvcm = [] fullnprof = [] for year in years: mask = 'out.{:02d}/{:04d}/*.nc4'.format(window, year) print mask try: vcm = da.read_nc(mask, vcm_name, axis='file') except ValueError: print 'No monthlies for {:04d}, skipping'.format(year) continue # vcm.reset_axis(filename_to_datetime, 'file') nprof = da.read_nc(mask, 'nprof', axis='file') # nprof.reset_axis(filename_to_datetime, 'file') fullvcm.append(vcm) fullnprof.append(nprof) vcm = da.concatenate(fullvcm, axis='file') nprof = da.concatenate(fullnprof, axis='file') print vcm, nprof np.savez('series_%d.npz' % window, vcm=vcm, nprof=nprof, altmin=vcm.labels[1], time=vcm.labels[0], lon=vcm.labels[2])
def month_tropic_width(f): dset = da.read_nc(["cal333+cal05+cal20+cal80+csat", "cal333+cal05+cal20+cal80+csat_cprof"]) vcm = dset["cal333+cal05+cal20+cal80+csat"] nprof = dset["nprof"] cf_lat = np.ma.masked_invalid(1.0 * vcm.values.T / nprof.values) cf_lat = cf_lat.T tropic_range = dict() for vcm_min in vcm_mins: tropic_range[vcm_min] = tropic_width.tropic_width3(vcm.lat, vcm.altitude, cf_lat, vcm_min) return tropic_range
def cf_zonal(filename): data = da.read_nc(filename) try: vcm_prof = data['cal333+cal05+cal20+cal80+csat'] nprof = data['cal333+cal05+cal20+cal80+csat_cprof'] except KeyError: return None, None, None cf_lat = 1. * vcm_prof.values.T / nprof.values cf_lat = cf_lat.T cf_lat = np.ma.masked_invalid(cf_lat) return vcm_prof.labels[0], vcm_prof.labels[1], cf_lat
def main(mask='out/200607/*nc4'): dset = da.read_nc(mask, ['cloudpts'], axis='tai_time') cpts = dset['cloudpts'] cpts = cpts / 3. print 'Range of cloudy points in cloudsat-only cloudy profiles : ', np.min( cpts.values), np.max(cpts.values) plt.figure() cpts.plot(lw=0.5) plt.figure() plt.hist(cpts[cpts > 0].values, 20) plt.show()
def ttest_roundtrip_datetime(axis, tmpdir): a = da.DimArray(np.arange(axis.size), axes=[axis]) # write fname = tmpdir.join("test_datetime.nc").strpath # have test.nc in some temporary directory a.write_nc(fname, 'myarray') # write # read-back actual = da.read_nc(fname, 'myarray') if axis.dtype.kind == "M": pass # TODO: convert axis to datetime else: assert_equal_dimarrays(actual, expected=a)
def month_tropic_width(f): dset = da.read_nc([ 'cal333+cal05+cal20+cal80+csat', 'cal333+cal05+cal20+cal80+csat_cprof' ]) vcm = dset['cal333+cal05+cal20+cal80+csat'] nprof = dset['nprof'] cf_lat = np.ma.masked_invalid(1. * vcm.values.T / nprof.values) cf_lat = cf_lat.T tropic_range = dict() for vcm_min in vcm_mins: tropic_range[vcm_min] = tropic_width.tropic_width3( vcm.lat, vcm.altitude, cf_lat, vcm_min) return tropic_range
def setUp(self): ncfile = da.get_ncfile('greenland_velocity.nc') try: self.grl = da.read_nc(ncfile) except: self.grl = None warnings.warn('could not read netCDF: no test_coords') return grl = self.grl self.vmag = grl['surfvelmag'] self.vx = grl['surfvelx'] self.vy = grl['surfvely'] self.lon = grl['lon'] self.lat = grl['lat'] self.mapping = grl['mapping']._metadata()
def get_atl_tcs(file='/Users/peterpfleiderer/Projects/tropical_cyclones/data/Allstorms.ibtracs_all.v03r10.nc'): TC=da.read_nc(file) # select north atlantic basin tc_sel=TC.ix[TC['basin'][:,0]==0] # select time period tc_sel=tc_sel.ix[tc_sel['season']>=1900,:] # select main tracks tc_sel=tc_sel.ix[tc_sel['track_type']==0,:] tc_lat=tc_sel['lat_for_mapping'] tc_lon=tc_sel['lon_for_mapping'] # tc_sel=tc_sel.ix[np.where(tc_sel_cat>0)] tmp_time=tc_sel['source_time'] tc_year,tc_month,tc_yrmn,tc_yrFr=tmp_time.copy(),tmp_time.copy(),tmp_time.copy(),tmp_time.copy() for storm in tmp_time.storm: for tt in tmp_time.time: if np.isfinite(tmp_time[storm,tt]): datevar=num2date(tmp_time[storm,tt],units = tmp_time.units) tc_year[storm,tt]=datevar.year tc_month[storm,tt]=datevar.month tc_yrmn[storm,tt]=datevar.year+float(datevar.month-1)/12. tc_yrFr[storm,tt]=toYearFraction(datevar) # remove extratropical time steps (and distrubances) tc_wind=tc_sel['source_wind'].ix[:,:,0] tc_wind.ix[tc_sel['nature_for_mapping']!=2]==np.nan tc_wind.ix[tc_sel['nature_for_mapping']!=3]==np.nan tc_pres=tc_sel['source_pres'].ix[:,:,0] tc_pres.ix[tc_sel['nature_for_mapping']!=2]==np.nan tc_pres.ix[tc_sel['nature_for_mapping']!=3]==np.nan ds=da.Dataset({ 'wind':tc_wind, 'mslp':tc_pres, 'lat':tc_lat, 'lon':tc_lon, 'time':tc_sel['source_time'], 'year':tc_year, 'month':tc_month, 'yrmn':tc_yrmn, 'yrFr':tc_yrFr, }) return(ds)
def integrate(self, years, out_dir=None, **kwargs): """Run the model as it were from command-line years : float years of simulation out_dir : str, optional provide an output directory for the simulation (otherwise a new directory will be created) **kwargs : passed to run_model This includes 'dt' (time step etc...) """ if out_dir: self.out_dir = out_dir else: self.out_dir = self._get_out_dir(create=True) # new out directory self._instance_directories.append(self.out_dir) self._class_directories.append(self.out_dir) # write glacier state to disk in_file = self._get_in_file() self.to_dataset().write_nc(in_file) # and use it both as a restart and file and reference geometry file res = run_model(years, params=self.params, out_dir=self.out_dir, in_file=in_file, rst_file=in_file, **kwargs) if res != 0: warnings.warn("result value is not 0. Problem during integration?") return None # read results from restart file ds = da.read_nc(os.path.join(self.out_dir, 'restart.nc')) gl = self.from_dataset(ds) # read results from output file, whose geometry extends further # ds = self.read_output(os.path.join(self.out_dir, 'output.nc')) gl.params = self.params # copy params return gl
def month_tropic_width(monthfile): try: data = da.read_nc(monthfile) except: return None try: vcm = 1. * data['cal333+cal05+cal20+cal80+csat'] nprof = 1. * data['cal333+cal05+cal20+cal80+csat_cprof'] except KeyError: return None cf_lat = vcm.values.T / nprof.values cf_lat = cf_lat.T cf_lat = np.ma.masked_invalid(cf_lat) tropic_range = dict() for vcm_min in vcm_mins: tropic_range[vcm_min] = tropic_width3(vcm.labels[0], vcm.labels[1], cf_lat, vcm_min=vcm_min) return tropic_range
def aggregate_arrays_from_files(files, array_name, summed_along=None, bounds=None): aggregated = None files.sort() for f in files: data = da.read_nc(f) if array_name not in data: continue array = data[array_name] if summed_along is not None: array = array.sum(axis=summed_along) if aggregated is None: aggregated = 1. * array else: aggregated += array print aggregated.max() return aggregated
import os,sys,glob,time,collections,gc,itertools,timeit import numpy as np from netCDF4 import Dataset,netcdftime,num2date import dimarray as da sys.path.append('/Users/peterpfleiderer/Documents/Projects/HAPPI_persistence/persistence_in_models/') import cython_function sys.path.append('/global/homes/p/pepflei/weather_persistence/') sys.path.append('/Users/peterpfleiderer/Documents/Projects/weather_persistence/') from persistence_functions import * period=da.read_nc('data/tests/tas_Aday_ECHAM6-3-LR_Plus20-Future_CMIP5-MMM-est1_v2-0_run010_period.nc') tas=da.read_nc('data/tests/tas_Aday_ECHAM6-3-LR_Plus20-Future_CMIP5-MMM-est1_v2-0_run010.nc')['tas'] mask=da.read_nc('data/tests/landmask_96x192_NA-1.nc')['landmask'] mask[mask!=1]=0 mask=np.asarray(mask,np.int32) # land_yx=[(period.lat[np.argmin(abs(lat-period.lat))],period.lon[np.argmin(abs(lon-period.lon))]) for lat,lon in itertools.product(mask.lat,mask.lon) if mask[lat,lon]==1] # coord=np.array([(np.argmin(abs(lat-period.lat)),np.argmin(abs(lon-period.lon))) for lat,lon in itertools.product(mask.lat,mask.lon) if mask[lat,lon]==1],np.int32) # Ni=len(period.period_id) # Ny=len(period.lat) # Nx=len(period.lon) # periods_of_interest=np.array([[i,y,x] for i,y,x in itertools.product(range(Ni),range(Ny),range(Nx)) if period['period_season'].ix[i,y,x]==1]) def period_analysis_py(ids,lats_lons): for i,y,x in itertools.product(range(Ni),range(Ny),range(Nx)): ll=period['period_length'][per_id,lat_lon[0],lat_lon[1]] if ll>0 & period['period_season'][per_id,lat_lon[0],lat_lon[1]]==1:
'/p/projects/ikiimp/HAPPI/HAPPI_Peter/persistence_in_HAPPI/') os.chdir('/p/projects/ikiimp/HAPPI/HAPPI_Peter/') in_path = '/p/tmp/pepflei/HAPPI/raw_data/EKE/' + model + '/' out_path = '/p/tmp/pepflei/HAPPI/raw_data/reg_merge/' + model + '/' home_path = '/p/projects/ikiimp/HAPPI/HAPPI_Peter/persistence_in_HAPPI/' except: sys.path.append('/global/homes/p/pepflei/persistence_in_models/') os.chdir('/global/homes/p/pepflei/') working_path = '/global/cscratch1/sd/pepflei/EKE/' + model + '/' working_path = '/global/cscratch1/sd/pepflei/EKE/' + model + '/' home_path = '/global/homes/p/pepflei/persistence_in_models/' import __settings model_dict = __settings.model_dict masks = da.read_nc('masks/srex_mask_' + model_dict[model]['grid'] + '.nc') all_files = sorted( glob.glob(in_path + scenario + '/monEKE*_' + scenario + '*.nc')) big_merge = {} big_merge['eke'] = da.read_nc(all_files[0])['eke'][:, 0:, :] big_merge['run_id'] = da.read_nc(all_files[0])['eke'][:, 0:, :].copy() big_merge['run_id'].values = 0 for i_run, file_name in enumerate(all_files[1:]): print(file_name) big_merge['eke'] = da.concatenate( (big_merge['eke'], da.read_nc(file_name)['eke'][:, 0:, :])) tmp = da.read_nc(file_name)['eke'][:, 0:, :].copy() tmp.values = i_run + 1
import os, sys, glob, time, collections, gc import numpy as np from netCDF4 import Dataset, netcdftime, num2date import matplotlib.pylab as plt import dimarray as da import itertools import matplotlib import pandas as pd import seaborn as sns sns.set() plt.rc('font', family='Calibri') os.chdir( '/Users/peterpfleiderer/Documents/Projects/gmt/gmt_method_sensitivities') gmt_cowtan = da.read_nc('../data/gmt_all_cowtan.nc')['gmt'] gmt_all = da.read_nc('data/gmt_reproducedErrors.nc')['gmt'] # select special runs all_model_runs=[u'ACCESS1-0_r1i1p1',u'ACCESS1-3_r1i1p1',u'CCSM4_r1i1p1',u'CESM1-BGC_r1i1p1',u'CESM1-CAM5_r1i1p1',u'CMCC-CMS_r1i1p1',u'CMCC-CM_r1i1p1',u'CNRM-CM5_r1i1p1',\ u'CSIRO-Mk3-6-0_r1i1p1',u'CanESM2_r1i1p1',u'EC-EARTH_r1i1p1',u'GFDL-CM3_r1i1p1',u'GFDL-ESM2G_r1i1p1',u'GFDL-ESM2M_r1i1p1',u'GISS-E2-H-CC_r1i1p1',u'GISS-E2-H_r1i1p1',\ u'GISS-E2-R-CC_r1i1p1',u'GISS-E2-R_r1i1p1',u'HadGEM2-AO_r1i1p1',u'HadGEM2-CC_r1i1p1',u'HadGEM2-ES_r1i1p1',u'IPSL-CM5A-LR_r1i1p1',u'IPSL-CM5A-MR_r1i1p1',u'IPSL-CM5B-LR_r1i1p1',\ u'MIROC-ESM-CHEM_r1i1p1',u'MIROC-ESM_r1i1p1',u'MIROC5_r1i1p1',u'MPI-ESM-LR_r1i1p1',u'MPI-ESM-MR_r1i1p1',u'MRI-CGCM3_r1i1p1',u'MRI-ESM1_r1i1p1',u'NorESM1-ME_r1i1p1',u'NorESM1-M_r1i1p1'] # tos issue models tos_issues = [ u'EC-EARTH_r1i1p1', u'MIROC5_r1i1p1', u'MRI-CGCM3_r1i1p1', u'MRI-ESM1_r1i1p1',
import matplotlib.pyplot as plt import seaborn as sns import scipy.ndimage as ndimage from shapely.geometry.polygon import Polygon from shapely.geometry import Point import matplotlib.ticker as mticker import cartopy import cartopy.crs as ccrs os.chdir('/Users/peterpfleiderer/Projects/tropical_cyclones/') sys.path.append('/Users/peterpfleiderer/Projects/tropical_cyclones/TC_scripts') import TC_support ; TC_support = reload(TC_support) if 'TC' not in globals(): TC=da.read_nc('data/Allstorms.ibtracs_all.v03r10.nc') tc_sel=TC.ix[np.where(TC['basin'][:,0]==0)[0]] tc_sel=tc_sel.ix[tc_sel['season']>=1979] tc_sel=tc_sel.ix[tc_sel['season']>=1979] tc_lat=tc_sel['lat_for_mapping'] tc_lon=tc_sel['lon_for_mapping'] tc_lon[tc_lon<0]+=360 tc_wind=tc_sel['source_wind'] tc_wind[np.isnan(tc_wind)]=-999 tc_sel_cat=np.array(TC_support.tc_cat(np.nanmin(tc_sel['source_pres'],axis=(1,2)),'pressure')) tc_sel=tc_sel.ix[np.where(tc_sel_cat>0)] nc=da.read_nc('data/CAR25/item16222_6hrly_inst/item16222_6hrly_inst_p014_2017-06_2017-10.nc') lats = nc['global_latitude1'].values lons = nc['global_longitude1'].values
'-o', help="overwrite output files", action="store_true") args = parser.parse_args() if args.overwrite: overwrite = True else: overwrite = False identifiers = [ nn.split('_')[-3] for nn in glob.glob('../data/WAH/batch_755/region/item16222_6hrly_inst/*') ] for style in ['contours']: if os.path.isfile('detection/ATL/ATL_all_tracks_' + style + '.nc') == False or overwrite: # check for duplicates all_tracks = {} for identifier in identifiers: tmp = da.read_nc('detection/ATL/' + str(identifier) + '/track_info_' + style + '.nc') for id_, track in tmp.items(): if id_ not in ['z', 'time']: all_tracks[id_] = track all_tracks = da.Dataset({'all_tracks': all_tracks}) all_tracks.write_nc('detection/ATL/ATL_all_tracks_' + style + '.nc', mode='w')
def ncload(ncfile, variables=None, bbox=None, maxshape=None, map_var_names=None, map_dim_names=None, time_idx=None, time_dim='time', inverted_y_axis=False, dataroot=None, x=None, y=None, xdim='x', ydim='y'): """Standard ncload for netCDF files Parameters ---------- map_var_name : None or dict-like : make standard variables and actual file variables names match map_dim_name : None or dict-like : make standard dimensions and actual file dimensions names match inverted_y_axis : deal with the case where y axis is inverted (Rignot and Mouginot, Morlighem...) time_idx, time_dim : can be provided to extract a time slice dataroot : provide an alternative root path for datasets x, y : array-like : provide coordinates directly, when not present in file. """ ncfile = get_datafile(ncfile, dataroot) variables, _variable = check_variables(variables) # determine the variables to load if map_var_names is not None: map_var_names = map_var_names.copy() # becaause of pop ncvariables = [map_var_names.pop(nm,nm) for nm in variables] else: ncvariables = variables if map_dim_names is not None: xnm = map_dim_names[xdim] ynm = map_dim_names[ydim] else: xnm = xdim ynm = ydim # open the netCDF dataset nc_ds = nc.Dataset(ncfile) external_axes = x is not None or y is not None if x is None: x = nc_ds.variables[xnm] if y is None: y = nc_ds.variables[ynm] # determine the indices to extract slice_x, slice_y = get_slices_xy(xy=(x, y), bbox=bbox, maxshape=maxshape, inverted_y_axis=inverted_y_axis) indices = {xnm:slice_x,ynm:slice_y} if time_idx is not None: indices[time_dim] = time_idx # load the data using dimarray (which also copy attributes etc...) data = da.read_nc(nc_ds, ncvariables, indices=indices, indexing='position') # close dataset nc_ds.close() # in case axes were provided externally, just replace the values if external_axes: data.axes[xnm][:] = x[slice_x] data.axes[ynm][:] = y[slice_y] # rename dimensions appropriately and set metadata if map_dim_names is not None: if data.dims == (xnm, ynm): data.dims = (xdim, ydim) elif data.dims == (ynm, xnm): data.dims = (ydim, xdim) # unknown case? do nothing # rename variable names if map_var_names is not None: data.rename_keys({ncvar:var for var, ncvar in zip(variables, ncvariables)}, inplace=True) # only one variable if _variable is not None: data = data[_variable] return data
def setup_class(cls): cls.ds = da.read_nc(cls.ncfile)
#encoding:utf-8 # Created by V. Noel [LMD/CNRS] on 2014-07-09 import glob import numpy as np from datetime import datetime import dimarray as da d = datetime(2008,9,2) mask = 'out/%04d%02d/vcm_%04d-%02d-%02d*.nc4' % (d.year, d.month, d.year, d.month, d.day) print mask files = glob.glob(mask) for f in files: print 'Looking in', f d = da.read_nc(f) lat = d['lat'] idxlat = lat.values < -60 # idxlat = (lat.values > -30) & (lat.values < 30) alt = d['cal333'].altitude idxalt = alt > 15 for var in 'cal333', 'cal05', 'cal20', 'cal80', 'csat': cm = d[var].values cm = cm[idxlat,:] cm = cm[:,idxalt] if np.sum(cm) > 0: print 'high polar clouds found in ', var, np.sum(cm)
# use tmpdir fixture #def test_ncio(): def test_ncio(tmpdir): fname = tmpdir.join("test.nc").strpath # have test.nc in some temporary directory a = DimArray([1,2], dims='xx0') b = DimArray([3,4,5], dims='xx1') a.write_nc(fname,"a", mode='w') b.write_nc(fname,"b", mode='a') try: b.write_nc(fname.replace('.nc','netcdf3.nc'),"b", mode='w', format='NETCDF3_CLASSIC') except Exception, msg: warn("writing as NETCDF3_CLASSIC failed (known bug on 64bits systems): {msg}".format(msg=repr(msg))) data = read_nc(fname) assert(np.all(data['a'] == a)) assert(np.all(data['b'] == b)) ds = da.Dataset(a=a, b=b) for k in ds: assert(np.all(ds[k] == data[k])) def main(**kw): try: test_ncio() except RuntimeError, msg: warn("NetCDF test failed: {}".format(msg)) if __name__ == "__main__": main()
def main(input="test.out/200801/vcm_2008-01-01T01-30-23ZN.nc4"): fig = plt.figure(figsize=[12, 12]) vcm = da.read_nc(input) plot_vcms(vcm) plt.show()
def test_debug_doc(): direc = da.get_datadir() temp = da.read_nc(direc+'/cmip5.*.nc', 'temp', align=True, axis='model')