def timeline_trend_count_SA(): msg_folder = cnst.GRIDSAT fname = 'aggs/gridsat_WA_-65_monthly_count_-40base_1000km2.nc' fname2 = 'aggs/gridsat_WA_-40_monthly_count_-40base_1000km2.nc' da = xr.open_dataarray(msg_folder + fname) da2 = xr.open_dataarray(msg_folder + fname2) #[25,33,-28,-10] , West[15,25,-26,-18] da = da.sel(lat=slice(-25,-18), lon=slice(18, 22))# (lat=slice(-28,-10), lon=slice(25, 33)) da2 = da2.sel(lat=slice(-25,-18), lon=slice(18, 22)) #[25,33,-28,-10] #da=da.sel(lat=slice(5,10)) #da[da==0]=np.nan mean = da.mean(dim=['lat', 'lon']) mean2 = da2.mean(dim=['lat', 'lon']) #mean = mean[(mean['time.month']==8)] f= plt.figure(figsize=(10,6)) for i in [12,1]: bla = mean[(mean['time.month'] == i)] bla.plot(label=str(i), marker='o') plt.title('Average number of pixels <= -70C, SouthA 10-28S, 25-35E') f = plt.figure(figsize=(10, 6)) for i in [12,1]: bla2 = mean2[(mean2['time.month'] == i)] bla2.plot(label=str(i), marker='o') plt.title('Average number of pixels <= -40C, SouthA 10-28S, 25-35E') plt.legend()
def run(params): start_time = datetime.now() bin_width, filter_bandwidth, theta, shift, signal_field = params # Get file paths signal_dir = '/scratch/pkittiwi/fg1p/signal_map/bin{:.2f}/' \ 'fbw{:.2f}/theta{:.1f}/shift{:d}' \ .format(bin_width, filter_bandwidth, theta, shift) output_dir = '/scratch/pkittiwi/fg1p/stats_semi/signal/bin{:.2f}/' \ 'fbw{:.2f}/theta{:.1f}/shift{:d}' \ .format(bin_width, filter_bandwidth, theta, shift) signal_file = '{:s}/signal_map_bin{:.2f}_fbw{:.2f}_' \ 'theta{:.1f}_shift{:d}_{:03d}.nc'\ .format(signal_dir, bin_width, filter_bandwidth, theta, shift, signal_field) output_file = '{:s}/stats_semi_signal_bin{:.2f}_fbw{:.2f}_' \ 'theta{:.1f}_shift{:d}_{:03d}.nc' \ .format(output_dir, bin_width, filter_bandwidth, theta, shift, signal_field) mask_file = '/scratch/pkittiwi/fg1p/hera331_fov_mask.nc' # Load data to memory and align coordinates with xr.open_dataarray(signal_file) as da: signal = da.load() with xr.open_dataarray(mask_file) as da: mask = da.load() # Load one noise file to get coordinates. noise = xr.open_dataarray( '/scratch/pkittiwi/fg1p/noise_map/bin0.08/fbw8.00/theta90.0/shift0/' 'noise_map_bin0.08_fbw8.00_theta90.0_shift0_333.nc' ) for key, values in noise.coords.items(): signal.coords[key] = values mask.coords[key] = values signal, noise, mask = xr.align(signal, noise, mask) # Mask observation signal = signal.where(mask == 1) # Calculate statistic out = get_stats(signal) out.attrs = {'bin_width': bin_width, 'filter_bandwidth': filter_bandwidth, 'theta': theta, 'shift': shift} os.makedirs(output_dir, exist_ok=True) out.to_netcdf(output_file) out.close() print('Finish. signal_file = {:s}. output_file = {:s}. ' 'Time spent {:.5f} sec.' .format(signal_file, output_file, (datetime.now() - start_time).total_seconds()))
def __init__(self, instrument, ref_pb=None): """ Parameters ---------- :param instrument : Instrument Instrument configuration :param ref_pb : str, optional name of the reference passband """ super().__init__(instrument, ref_pb) I = self.instrument self._spectra = sp = pd.read_hdf(resource_filename(__name__, join("data", "spectra.h5")), 'Z0') self._tr_table = trt = xa.open_dataarray(resource_filename(__name__, join("data", "transmission.nc"))) self._tr_mean = trm = trt.mean(['airmass', 'pwv']) self.extinction = interp1d(trm.wavelength, trm, bounds_error=False, fill_value=0.0) self.wl = wl = sp.index.values self.lte = lte = sp.columns.values self._apply_extinction = True # Dataframe indices # ----------------- self.ipb = pd.Index(self.instrument.pb_names, name='passband') self.iteff = pd.Index(lte, name='teff') # Per-passband fluxes # ------------------- self._compute_relative_flux_tables(0, ref_pb)
def saveDailyBlobs(): """ Converts hourly centre-point convective-core files to daily netcdf files so they can be saved with LSTA daily data :return: """ msgfile = '/users/global/cornkle/MCSfiles/blob_map_allscales_-50_JJAS_points_dominant.nc' msg = xr.open_dataarray(msgfile) # def first_nozero(array_like, axis): # array_like[array_like<16]= array_like[array_like<16]+24 # return np.nanmin(array_like,axis=axis) msg.values[msg.values > 75] = np.nan msg.values[msg.values == 0] = np.nan for m in msg: if m['time.hour'].values >= 16: m.values[m > 0] = m['time.hour'].values else: m.values[m > 0] = m['time.hour'].values+24 ### this is useful, it removes all pixels which got rain twice on a day md = msg.resample('24H', base=16, dim='time', skipna=True, how='min') md = md[(md['time.month'] >=6) & (md['time.month'] <=9)] md.values[md.values>23] = md.values[md.values>23]-24 md.to_netcdf('/users/global/cornkle/MCSfiles/blob_map_allscales_-50_JJAS_points_dominant_daily.nc')
def setUp(self): file = os.path.join(BASE_PATH, 'model', 'GFS_Global_0p25deg_20161219_0600.nc') ds = SpatialDataset(NetCDFHandler(file),) self.array = xr.open_dataarray(file) self.grid = ds.get_grid( 'Maximum_temperature_height_above_ground_Mixed_intervals_Maximum', data_array=self.array)
def composite(h): pool = multiprocessing.Pool(processes=8) file = '/users/global/cornkle/MCSfiles/blob_map_allscales_-50_JJAS_points_dominant.nc' msg = xr.open_dataarray(file) msg = msg[(msg['time.hour'] == 17) & (msg['time.minute'] == 0) & ( msg['time.year'] >= 2006) & (msg['time.year'] <= 2009) & (msg['time.month'] >= 6) ] msg = msg.sel(lat=slice(10.5,17.5), lon=slice(-9.5,9.5)) res = pool.map(file_loop, msg) pool.close() # res = [] # for m in msg[0:50]: # r = file_loop(m) # res.append(r) res = [x for x in res if x is not None] scales = res scales = [item for sublist in scales for item in sublist] # flatten list of lists scales = np.concatenate(scales) return scales
def test_save_saves_also_grid(self): self.array.pp.grid = self.grid self.array.pp.save('test.nc') opened_array = xr.open_dataarray('test.nc') grid_attrs = {attr[7:]: opened_array.attrs[attr] for attr in opened_array.attrs if attr[:7] == 'ppgrid_'} opened_grid = GridBuilder(grid_attrs).build_grid() self.assertEqual(self.grid, opened_grid)
def run_rebuild_iter_1mhz(args): process = multiprocessing.current_process().pid inf, ouf = args print('pid: {:d} ; input file: {:s} ; output file: {:s}' .format(process, inf, ouf)) w_in = xr.open_dataarray(inf) w_out = rebuild_iter_1mhz(w_in) w_out.to_netcdf(ouf)
def load(cls, name, **kwargs): Bx = xr.open_dataarray(name, **kwargs) spl = cls(knots=Bx.knots, order=Bx.order, bc=Bx.bc, dim=Bx.dim) spl._coef = Bx return spl
def test_load_removes_grid_attrs(self): self.array.pp.grid = self.grid self.array.pp.save('test.nc') non_gridded_array = xr.open_dataarray('test.nc') non_gridded_attrs = [attr for attr in non_gridded_array.attrs if attr[:7] == 'ppgrid_'] self.assertTrue(non_gridded_attrs) gridded_array = xr.DataArray.pp.load('test.nc') gridded_attrs = [attr for attr in gridded_array.attrs if attr[:7] == 'ppgrid_'] self.assertFalse(gridded_attrs)
def regrid_simpler(cmorph): dummy = xr.open_dataset(constants.LSTA_TESTFILE) cm = xr.open_dataarray(cmorph) out = cmorph.replace('WA_', 'WA_onLSTA_') cm_on_lst = dummy.salem.transform(cm) enc = {'pr': {'complevel': 5, 'zlib': True}} cm_on_lst.to_netcdf(out, encoding=enc, format='NETCDF4')
def run(binnum): process = current_process().pid print('... P{:d}: applying filter {:s}' .format(process, filter_files[binnum].split('/')[-1])) filter_da = xr.open_dataarray(filter_files[binnum]) filter_array = filter_da.values data_channels = filter_da.attrs['frequency_channels'] filter_bandwidth = filter_da.attrs['filter_bandwidth'] # Figure out FFT and filter normalization # FFT normalization factor x = filter_da.attrs['x'] y = filter_da.attrs['y'] f = filter_da.attrs['f'] dx = x[1] - x[0] dy = y[1] - y[0] df = f[1] - f[0] u = filter_da.attrs['u'] v = filter_da.attrs['v'] e = filter_da.attrs['e'] du = u[1] - u[0] dv = v[1] - v[0] de = e[1] - e[0] fft_norm = dx * dy * df ifft_norm = du * dv * de * filter_array.size # Filter normalization factor filter_volume = np.sum(filter_array.size * du * dv * de) filter_integral = np.sum(np.abs(filter_array) ** 2 * du * dv * de) filter_norm = np.sqrt(filter_volume / filter_integral) # Apply filter filtered_data = apply_filter( data_array[data_channels], filter_array, fft_multiplier=fft_norm, ifft_multiplier=ifft_norm, output_multiplier=filter_norm, apply_window_func=args.apply_window_func, invert_filter=False ).real out_da_attrs = filter_da.attrs out_da_attrs.pop('x') out_da_attrs.pop('y') out_da_attrs.pop('f') out_da_attrs['kx'] = filter_da.kx.values out_da_attrs['ky'] = filter_da.ky.values out_da_attrs['kz'] = filter_da.kz.values out_da = xr.DataArray( filtered_data, dims=['f', 'y', 'x'], coords={'f': f, 'y': y, 'x': x}, attrs=out_da_attrs ) outfile = '{:s}/signal_cube_filtered_fbw{:.2f}MHz_{:03d}_bin{:03d}.nc'\ .format(args.output_directory, filter_bandwidth / 1e6, field_num, binnum) out_da.to_netcdf(outfile)
def composite(): pool = multiprocessing.Pool(processes=4) file = constants.MCS_POINTS_DOM msg = xr.open_dataarray(file) msg = msg[ (msg['time.minute'] == 0) & ( msg['time.year'] >= 2006) & (msg['time.year'] <= 2010) & (msg['time.month'] >= 6)] #(msg['time.hour'] >= 17) & msg = msg.sel(lat=slice(10.2, 17), lon=slice(-9.5, 9.5)) res = pool.map(file_loop, msg) pool.close() # for m in msg[0:10]: # file_loop(m) # # return res = [x for x in res if x is not None] cell = [] surface = [] hour = [] for r in res: cell.append(r[0]) surface.append(r[1]) hour.append(r[2]) pdb.set_trace() cell = [item for sublist in cell for item in sublist] # flatten list of lists surface = [item for sublist in surface for item in sublist] # flatten list of lists hour = [item for sublist in hour for item in sublist] # flatten list of lists cell = np.array(cell, dtype=float) cell = cell[np.isfinite(surface)] surface = np.array(surface, dtype=float) surface = surface[np.isfinite(surface)] hour = np.array(hour, dtype=float) hour = hour[np.isfinite(surface)] dic = {'cell': cell, 'surface': surface, 'hour' : hour } pkl.dump(dic, open("/users/global/cornkle/figs/LSTA-bullshit/scales/new/dominant_scales_save/scatter_scales.p", "wb")) print('Successfully written scatter_scales save file')
def blobs(): #file = '/users/global/cornkle/MCSfiles/blob_map_30km_-67_JJAS_points.nc' file = '/users/global/cornkle/MCSfiles/blob_map_allscales_-50_JJAS_points_dominant.nc' fpath = '/users/global/cornkle/data/pythonWorkspace/proj_CEH/topo/gtopo_1min_afr.nc' msg = xr.open_dataarray(file) msg = msg.sel(lat=slice(10, 20), lon=slice(-10, 10)) msg = msg[ (msg['time.month'] >= 6 ) ] msg = msg.where(msg > 6) msg.values[msg.values>6] = 1 msg = msg.sum(dim='time') map = msg.salem.get_map(cmap='viridis') top = xr.open_dataarray(fpath) f = plt.figure() z = map.set_topography(top, relief_factor=1.4) map.set_contour(z, levels=(200,400,600,800), cmap='Reds' ) map.set_data(msg) map.visualize(title='Blobs and topo') msg = msg.sum(dim='lon') f = plt.figure() msg.plot()
def get_previous_hours_msg(date, ehour, refhour): # tdic = {18 : ('36 hours', '15 hours'), # 19 : ('37 hours', '16 hours'), # 20: ('38 hours', '17 hours'), # 21: ('39 hours', '18 hours'), # 22: ('40 hours', '19 hours'), # 23: ('41 hours', '20 hours'), # 0: ('42 hours', '21 hours'), # 3: ('45 hours', '24 hours'), # 6: ('48 hours', '27 hours')} # before = pd.Timedelta(tdic[date.hour][0]) # before2 = pd.Timedelta(tdic[date.hour][1]) date = date.replace(hour=refhour) if ehour > 0: edate = date + pd.Timedelta(str(ehour) + ' hours') else: edate = date - pd.Timedelta(str(np.abs(ehour)) + ' hours') #edate = edate.replace(hour=ehour) t1 = edate - pd.Timedelta('1 hours') t2 = edate + pd.Timedelta('1 hours') file = cnst.MCS_15K# MCS_15K #_POINTS_DOM msg = xr.open_dataarray(file) try: msg = msg.sel(time=slice(t1.strftime("%Y-%m-%dT%H"), t2.strftime("%Y-%m-%dT%H"))) except OverflowError: return None #print(prev_time.strftime("%Y-%m-%dT%H"), date.strftime("%Y-%m-%dT%H")) pos = np.where((msg.values <= -40) ) #(msg.values >= 5) & (msg.values < 65)) # # out = np.zeros_like(msg) out[pos] = 1 out = np.sum(out, axis=0) out[out>0]=1 # if np.sum(out>1) != 0: # 'Stop!!!' # pdb.set_trace() msg = msg.sum(axis=0)*0 xout = msg.copy() xout.name = 'probs' xout.values = out return xout
def create_ancils(): dummy = xr.open_dataarray(dummy_grid) ds = xr.Dataset(attrs=dummy.attrs) dummy = dummy.isel(grid_longitude_t=slice(box[0], box[1]), grid_latitude_t=slice(box[2], box[3])) files = glob.glob(ancils+'*.nc') files for f in files: varsdat = xr.open_dataset(f, decode_times=False) if 'pseudo' in varsdat.keys(): varsdat = varsdat.isel(rlon=slice(box[0], box[1]), rlat=slice(box[2], box[3])) data = varsdat['field1391'].values[0, 0, :,:].squeeze() # time, plant type, y, x if 'past' in f: var = 'veg_past' elif 'current' in f: var = 'veg_current' else: print('Ancils not found') return ds[var] = xr.DataArray(data, coords={'false_latitude': dummy.grid_latitude_t.values, 'false_longitude': dummy.grid_longitude_t.values, 'true_latitude': ( ['false_latitude', 'false_longitude'], dummy.latitude_t.values), 'true_longitude': ( ['false_latitude', 'false_longitude'], dummy.longitude_t.values)}, dims=['false_latitude', 'false_longitude']) if 'ht' in varsdat.keys(): varsdat = varsdat.isel(rlon=slice(box[0], box[1]), rlat=slice(box[2], box[3])) data = varsdat['ht'].values[0, 0, :, :].squeeze() # time, plant type, y, x ds['topo'] = xr.DataArray(data, coords={'false_latitude': dummy.grid_latitude_t.values, 'false_longitude': dummy.grid_longitude_t.values, 'true_latitude': ( ['false_latitude', 'false_longitude'], dummy.latitude_t.values), 'true_longitude': ( ['false_latitude', 'false_longitude'], dummy.longitude_t.values)}, dims=['false_latitude', 'false_longitude']) ds.to_netcdf(out+'ancils/ancils_vera.nc')
def t_trend_slice(): #file = '/users/global/cornkle/data/ERA-I monthly/ERA-WA-Monthly-2mTemp.nc' file = '/localscratch/wllf030/cornkle/ERA-I/monthly/old/ERA-Int-Monthly-2mTemp.nc' fpath = '/users/global/cornkle/figs/CLOVER/months/' dam = xr.open_dataarray(file) lower = 9 higher = 11 da = dam[(dam['time.month']>=lower) & (dam['time.month']<=higher)] da = da.sel(longitude=slice(-18,51), latitude=slice(36, -37)) da = da.groupby('time.year').mean(axis=0) lons = da.longitude lats = np.flip(da.latitude.values, axis=0) # define a function to compute a linear trend of a timeseries def linear_trend(x): #pf = np.polyfit(np.arange(len(x)), x, 1) pf, slope, int, p, ind = mk.test(np.arange(len(x)),x.squeeze().values, eps=0.001, alpha=0.01, Ha='upordown') # we need to return a dataarray or else xarray's groupby won't be happy if ind == 1: issig = slope else: issig = np.nan return xr.DataArray(issig, ) # stack lat and lon into a single dimension called allpoints stacked = da.stack(allpoints=['latitude','longitude']) # apply the function over allpoints to calculate the trend at each point trend = stacked.groupby('allpoints').apply(linear_trend) # unstack back to lat lon coordinates trend_unstacked = trend.unstack('allpoints') trend_unstacked = trend_unstacked*10. # warming over decade da2 = xr.DataArray(trend_unstacked, coords=[lats, lons], dims=['latitude', 'longitude']) fp = fpath + 'ttrend_'+str(lower).zfill(2)+'-'+str(higher).zfill(2)+'.png' up.quick_map_salem(da2, vmin=-0.4, vmax=0.4, cmap='RdBu_r', save=fp) # plt.close('all')
def timeline_trend_count(): msg_folder = cnst.GRIDSAT fname = 'aggs/gridsat_WA_-70_monthly_count_-40base_1000km2.nc' da = xr.open_dataarray(msg_folder + fname) da = da.sel(lat=slice(4.5,8), lon=slice(-10, 15)) #da=da.sel(lat=slice(5,10)) #da[da==0]=np.nan mean = da.mean(dim=['lat', 'lon']) #mean = mean[(mean['time.month']==8)] f= plt.figure(figsize=(10,6)) for i in range(3,6): bla = mean[(mean['time.month'] == i)] bla.plot(label=str(i), marker='o') plt.title('Average number of pixels <= -70C, 4.5-8N') plt.legend()
def timeline_trend_mean(): msg_folder = '/users/global/cornkle/data/OBS/gridsat/gridsat_netcdf/' fname = 'gridsat_WA_-70_monthly.nc' da = xr.open_dataarray(msg_folder + fname) da=da.sel(lat=slice(5,7), lon=slice(-17,20)) da[da==0]=np.nan mean = da.mean(dim=['lat', 'lon']) #mean = mean[(mean['time.month']==8)] f= plt.figure(figsize=(10,6)) for i in range(4,6): bla = mean[(mean['time.month'] == i)] bla.plot(label=str(i), marker='o') plt.title('Monthly mean temperature of pixels <= -40C, 11-18N') plt.legend() plt.ylim(-78,-71)
def run(fieldnum): unmask_cube = fits.getdata( '/data6/piyanat/projects/fg1p/mc_cubes_heraxx/hera331/' 'hera331_mc_cube_p{:03d}.fits'.format(fieldnum) ) input_dir = '/data6/piyanat/projects/fg1p/masked_cubes_heraxx/m1/' \ '{:d}MHz/p{:03d}'.format(args.bw, fieldnum) s_arr0 = np.empty((4, nbins, 2, 50)) s_arr1 = np.empty((4, nbins, 2, 50)) s_f0 = np.empty(nbins) for binnum in range(nbins): data_da = xr.open_dataarray( '{:s}/masked_cube_hera331_p{:03d}_m1_bw{:d}MHz_bin{:02d}_fpad.nc' .format(input_dir, fieldnum, args.bw, binnum) ) fov_window = get_fov_window(data_da) f_window = get_f_window(data_da) full_mask = fov_window[None, :, :] * f_window[:, None, None] f_mask = np.ones_like(full_mask, dtype=bool) * f_window[:, None, None] fov_mask = np.ones_like(full_mask, dtype=bool) * fov_window[None, :, :] s_arr1[0, binnum] = cal_pdf(data_da.values.ravel()) s_arr1[1, binnum] = cal_pdf(data_da.values[f_mask].ravel()) s_arr1[2, binnum] = cal_pdf(data_da.values[fov_mask].ravel()) s_arr1[3, binnum] = cal_pdf(data_da.values[full_mask].ravel()) ch_cut = slice(0 + (binnum * nf), (nf * 2) + (binnum * nf)) unmask_data = unmask_cube[::-1][ch_cut][::-1] s_arr0[0, binnum] = cal_pdf(unmask_data.ravel()) s_arr0[1, binnum] = cal_pdf(unmask_data[f_mask].ravel()) s_arr0[2, binnum] = cal_pdf(unmask_data[fov_mask].ravel()) s_arr0[3, binnum] = cal_pdf(unmask_data[full_mask].ravel()) s_f0[binnum] = data_da.f0.values s_ds = xr.Dataset( {'original': (['cut', 'f0', 'val', 'val_bin'], s_arr0), 'fg_masked': (['cut', 'f0', 'val', 'val_bin'], s_arr1)}, coords={'cut': np.array(['none', 'freq', 'fov', 'all']), 'f0': s_f0, 'val': ['pdf', 'bin_center'], 'val_bin': np.arange(50)} ) s_ds.to_netcdf('/data6/piyanat/projects/fg1p/stats/' '{:d}MHz/pdf_hera331_masked_cube_p{:03d}_bw{:d}MHz.nc' .format(args.bw, fieldnum, args.bw))
def get_previous_hours(date): tdic = {18 : ('36 hours', '15 hours'), 19 : ('37 hours', '16 hours'), 20: ('38 hours', '17 hours'), 21: ('39 hours', '18 hours'), 22: ('40 hours', '19 hours'), 23: ('41 hours', '20 hours'), 0: ('42 hours', '21 hours'), 3: ('45 hours', '24 hours'), 6: ('48 hours', '27 hours')} before = pd.Timedelta(tdic[date.hour][0]) before2 = pd.Timedelta(tdic[date.hour][1]) #before2 = pd.Timedelta('15 minutes') t1 = date - before t2 = date - before2 file = constants.MCS_15K# MCS_15K #_POINTS_DOM msg = xr.open_dataarray(file) try: msg = msg.sel(time=slice(t1.strftime("%Y-%m-%dT%H"), t2.strftime("%Y-%m-%dT%H"))) except OverflowError: return None #print(prev_time.strftime("%Y-%m-%dT%H"), date.strftime("%Y-%m-%dT%H")) pos = np.where((msg.values <= -70) ) #(msg.values >= 5) & (msg.values < 65)) # # out = np.zeros_like(msg) out[pos] = 1 out = np.sum(out, axis=0) out[out>0]=1 if np.sum(out>1) != 0: 'Stop!!!' pdb.set_trace() msg = msg.sum(axis=0)*0 xout = msg.copy() xout.name = 'probs' xout.values = out return xout
def composite(h): #pool = multiprocessing.Pool(processes=8) file = constants.MCS_CENTRE70 #MCS_POINTS_DOM hour = h msg = xr.open_dataarray(file) msg = msg[(msg['time.hour'] >= 18 ) & (msg['time.hour'] <= 21 ) & (msg['time.minute'] == 0) & ( msg['time.year'] >= 2008) & (msg['time.year'] <= 2010) & (msg['time.month'] >=6) ] msg = msg.sel(lat=slice(10.9,19.5), lon=slice(-9.8,9.8)) dic = u_parallelise.run_arrays(7,file_loop,msg[0:50],['ano', 'regional', 'cnt', 'prob', 'pcnt']) #'rano', 'rregional', 'rcnt', for k in dic.keys(): dic[k] = np.nansum(dic[k], axis=0) pkl.dump(dic, open("/users/global/cornkle/figs/LSTA-bullshit/corrected_LSTA/system_scale/composite_backtrack_"+str(hour).zfill(2)+".p", "wb"))
def t_mean(): # file = '/users/global/cornkle/data/ERA-I monthly/ERA-WA-Monthly-2mTemp.nc' file = '/users/global/cornkle/data/ERA-I monthly/ERA-Int-Monthly-2mTemp.nc' fpath = '/users/global/cornkle/figs/gap_filling_Tgrad/months/' dam = xr.open_dataarray(file) months = np.arange(1, 13) for m in months: da = dam[(dam['time.month'] == m)] da = da.sel(longitude=slice(-18, 51), latitude=slice(36, -37)) da = da.mean(axis=0)-273.15 fp = fpath + 'tmean_' + str(m).zfill(2) + '.png' up.quick_map_salem(da, levels=np.arange(20,41,2), cmap='jet', save=fp)
def composite(h, eh): #pool = multiprocessing.Pool(processes=8) file = cnst.MCS_CENTRE70 hour = h msg = xr.open_dataarray(file) msg = msg[(msg['time.hour'] == hour ) & (msg['time.minute'] == 0) & ( msg['time.year'] >= 2006) & (msg['time.year'] <= 2010) & (msg['time.month'] >=6) ] msg = msg.sel(lat=slice(10.9,19), lon=slice(-9.8,9.8)) msg.attrs['eh'] = eh msg.attrs['refhour'] = h dic = {} #ipdb.set_trace() for ids in range(0,len(msg), 50): dic = u_parallelise.era_run_arrays(1,file_loop,msg[ids:ids+50], dic) #'rano', 'rregional', 'rcnt', # res = [] # for mm in msg: # out =file_loop(mm) # res.append(out) print('Returned from parallel') res = [x for x in res if x is not None] rres = [] dic_names = (res[0])[1] for r in res: rres.append(np.array(r[0])) vars = np.array(rres) for id, l in enumerate(dic_names): dic[l] = np.nansum(np.squeeze(vars[:,id,...]), axis=0) # for k in dic.keys(): # dic[k] = np.nansum(dic[k], axis=0) pkl.dump(dic, open(cnst.network_data + "figs/LSTA-bullshit/AGU/composite_backtrack"+str(eh) + "UTCERA"+str(hour).zfill(2)+".p", "wb")) print('Dumped file')
def composite(h, eh): #pool = multiprocessing.Pool(processes=8) file = cnst.MCS_CENTRE70 hour = h msg = xr.open_dataarray(file) msg = msg[(msg['time.hour'] == hour ) & (msg['time.minute'] == 0) & ( msg['time.year'] >= 2006) & (msg['time.year'] <= 2010) & (msg['time.month'] >=6) ] msg = msg.sel(lat=slice(10.9,19), lon=slice(-9.8,9.8)) msg.attrs['eh'] = eh msg.attrs['refhour'] = h dic = OrderedDict() for sk in ['lsta', 'cnt', 'cntp', 'cntm', 'probmsg']: dic['lsta'] res = [] for mm in msg: out =file_loop(mm) res.append(out) print('Returned from parallel') res = [x for x in res if x is not None] rres = [] dic_names = (res[0])[1] for r in res: rres.append(np.array(r[0])) vars = np.array(rres) for id, l in enumerate(dic_names): dic[l] = np.nansum(np.squeeze(vars[:,id,...]), axis=0) # for k in dic.keys(): # dic[k] = np.nansum(dic[k], axis=0) pkl.dump(dic, open(cnst.network_data + "figs/LSTA-bullshit/AGU/composite_backtrack"+str(eh) + "UTCERA"+str(hour).zfill(2)+".p", "wb")) print('Dumped file')
def do_mask(i): process = current_process().pid if args.verbose: print('... P{:d}: applying filter {:s}' .format(process, filter_files[i].split('/')[-1])) filter_da = xr.open_dataarray(filter_files[i]) filter_array = filter_da.values data_channels = filter_da.attrs['frequency_channels'] image_channel = int(np.floor(filter_da.shape[0] / 2)) # Figure out FFT and filter normalization # FFT normalization factor x = filter_da.attrs['x'] y = filter_da.attrs['y'] f = filter_da.attrs['f'] dx = x[1] - x[0] dy = y[1] - y[0] df = f[1] - f[0] u = filter_da.attrs['u'] v = filter_da.attrs['v'] e = filter_da.attrs['e'] du = u[1] - u[0] dv = v[1] - v[0] de = e[1] - e[0] fft_norm = dx * dy * df ifft_norm = du * dv * de * filter_array.size # Filter normalization factor filter_volume = np.sum(filter_array.size * du * dv * de) filter_integral = np.sum(np.abs(filter_array) ** 2 * du * dv * de) filter_norm = np.sqrt(filter_volume / filter_integral) # Apply filter filtered_data = apply_filter( data_array[data_channels], filter_array, fft_multiplier=fft_norm, ifft_multiplier=ifft_norm, output_multiplier=filter_norm, apply_window_func=args.apply_window_func, invert_filter=False ).real # Select and store the center channel of the filtered data array filtered_data_array[data_channels[image_channel]] = \ filtered_data[image_channel]
def regrid(cmorph): dummy = xr.open_dataset(constants.LSTA_TESTFILE) cm = xr.open_dataarray(cmorph) out = cmorph.replace('WA_', 'WA_onLSTA_') arrays = [] for c in cm: c_on_lsta = dummy.salem.transform(c) arrays.append(c_on_lsta) astack = np.stack(arrays, axis=0) da = xr.DataArray(astack, coords={'time': cm.time, 'lat': dummy.lat, 'lon': dummy.lon}, dims=['time', 'lat', 'lon']) # .isel(time=0) da.to_netcdf(out)
def composite(h): #pool = multiprocessing.Pool(processes=8) file = constants.MCS_CENTRE70 hour = h msg = xr.open_dataarray(file) msg = msg[((msg['time.hour'] >= 17 ) | (msg['time.hour'] <= 19 )) & ((msg['time.minute'] == 0) & ( msg['time.year'] >= 2008) & (msg['time.year'] <= 2010) & (msg['time.month'] >=6)) ] msg = msg.sel(lat=slice(10.9,19), lon=slice(-9.8,9.8)) dic = u_parallelise.era_run_arrays(5,file_loop,msg) #'rano', 'rregional', 'rcnt', # for k in dic.keys(): # dic[k] = np.nansum(dic[k], axis=0) pkl.dump(dic, open("/users/global/cornkle/figs/LSTA-bullshit/corrected_LSTA/system_scale/doug/composite_backtrack_ERA_pl_"+str(hour).zfill(2)+".p", "wb"))
def rinexobs(fn, ofn=None): """ Program overviw: 1) scan the whole file for the header and other information using scan(lines) 2) each epoch is read and the information is put in a 4-D xarray.DataArray 3) rinexobs can also be sped up with if an h5 file is provided, also rinexobs can save the rinex file as an h5. The header will be returned only if specified. rinexobs() returns the data in a 4-D xarray.DataArray, [Parameter,Sat #,time,data/loss of lock/signal strength] """ # open file, get header info, possibly speed up reading data with a premade h5 file fn = Path(fn).expanduser() with fn.open('r') as f: tic = time() lines = f.read().splitlines(True) header, version, headlines, headlength, obstimes, sats, svset = scan(lines) print(fn, 'is a RINEX', version, 'file.', fn.stat().st_size//1000, 'kB.') if fn.suffix == '.nc': data = xarray.open_dataarray(str(fn), group='OBS') elif fn.suffix == '.h5': logging.warning('HDF5 is deprecated in this program, please use NetCDF format') import pandas data = pandas.read_hdf(fn, key='OBS') else: data = processBlocks(lines, header, obstimes, svset, headlines, headlength, sats) print("finished in {:.2f} seconds".format(time()-tic)) # write an h5 file if specified if ofn: ofn = Path(ofn).expanduser() print('saving OBS data to', ofn) if ofn.is_file(): wmode = 'a' else: wmode = 'w' data.to_netcdf(ofn, group='OBS', mode=wmode) return data, header
def composite(h): pool = multiprocessing.Pool(processes=8) file = '/users/global/cornkle/MCSfiles/blob_map_allscales_-50_JJAS_points_dominant.nc' msg = xr.open_dataarray(file) msg = msg[(msg['time.hour'] == h) & (msg['time.minute'] == 0) & ( msg['time.year'] >= 2006) & (msg['time.year'] <= 2010) & (msg['time.month'] >= 6) ] msg = msg.sel(lat=slice(10.5,17.5), lon=slice(-9.5,9.5)) res = pool.map(file_loop, msg) pool.close() # for m in msg[0:50]: # file_loop(m) # # return res = [x for x in res if x is not None] blobs = [] scales = [] temp = [] for r in res: scales.append(r[0]) temp.append(r[1]) blobs.append(r[2]) blobs = [item for sublist in blobs for item in sublist] # flatten list of lists scales = [item for sublist in scales for item in sublist] # flatten list of lists temp = [item for sublist in temp for item in sublist] return blobs, scales, temp
def attach_hydro(n, costs, ppl): if 'hydro' not in snakemake.config['renewable']: return c = snakemake.config['renewable']['hydro'] carriers = c.get('carriers', ['ror', 'PHS', 'hydro']) _add_missing_carriers_from_costs(n, costs, carriers) ppl = ppl.query('carrier == "hydro"').reset_index(drop=True)\ .rename(index=lambda s: str(s) + ' hydro') ror = ppl.query('technology == "Run-Of-River"') phs = ppl.query('technology == "Pumped Storage"') hydro = ppl.query('technology == "Reservoir"') country = ppl['bus'].map(n.buses.country).rename("country") inflow_idx = ror.index | hydro.index if not inflow_idx.empty: dist_key = ppl.loc[inflow_idx, 'p_nom'].groupby(country).transform(normed) with xr.open_dataarray(snakemake.input.profile_hydro) as inflow: inflow_countries = pd.Index(country[inflow_idx]) missing_c = (inflow_countries.unique().difference( inflow.indexes['countries'])) assert missing_c.empty, ( f"'{snakemake.input.profile_hydro}' is missing " f"inflow time-series for at least one country: {', '.join(missing_c)}" ) inflow_t = (inflow.sel(countries=inflow_countries).rename({ 'countries': 'name' }).assign_coords(name=inflow_idx).transpose( 'time', 'name').to_pandas().multiply(dist_key, axis=1)) if 'ror' in carriers and not ror.empty: n.madd("Generator", ror.index, carrier='ror', bus=ror['bus'], p_nom=ror['p_nom'], efficiency=costs.at['ror', 'efficiency'], capital_cost=costs.at['ror', 'capital_cost'], weight=ror['p_nom'], p_max_pu=(inflow_t[ror.index].divide( ror['p_nom'], axis=1).where(lambda df: df <= 1., other=1.))) if 'PHS' in carriers and not phs.empty: # fill missing max hours to config value and # assume no natural inflow due to lack of data phs = phs.replace({'max_hours': {0: c['PHS_max_hours']}}) n.madd('StorageUnit', phs.index, carrier='PHS', bus=phs['bus'], p_nom=phs['p_nom'], capital_cost=costs.at['PHS', 'capital_cost'], max_hours=phs['max_hours'], efficiency_store=np.sqrt(costs.at['PHS', 'efficiency']), efficiency_dispatch=np.sqrt(costs.at['PHS', 'efficiency']), cyclic_state_of_charge=True) if 'hydro' in carriers and not hydro.empty: hydro_max_hours = c.get('hydro_max_hours') hydro_stats = pd.read_csv(snakemake.input.hydro_capacities, comment="#", na_values='-', index_col=0) e_target = hydro_stats["E_store[TWh]"].clip(lower=0.2) * 1e6 e_installed = hydro.eval('p_nom * max_hours').groupby( hydro.country).sum() e_missing = e_target - e_installed missing_mh_i = hydro.query('max_hours == 0').index if hydro_max_hours == 'energy_capacity_totals_by_country': # watch out some p_nom values like IE's are totally underrepresented max_hours_country = e_missing / \ hydro.loc[missing_mh_i].groupby('country').p_nom.sum() elif hydro_max_hours == 'estimate_by_large_installations': max_hours_country = hydro_stats['E_store[TWh]'] * 1e3 / \ hydro_stats['p_nom_discharge[GW]'] missing_countries = (pd.Index(hydro['country'].unique()).difference( max_hours_country.dropna().index)) if not missing_countries.empty: logger.warning( "Assuming max_hours=6 for hydro reservoirs in the countries: {}" .format(", ".join(missing_countries))) hydro_max_hours = hydro.max_hours.where( hydro.max_hours > 0, hydro.country.map(max_hours_country)).fillna(6) n.madd( 'StorageUnit', hydro.index, carrier='hydro', bus=hydro['bus'], p_nom=hydro['p_nom'], max_hours=hydro_max_hours, capital_cost=(costs.at['hydro', 'capital_cost'] if c.get('hydro_capital_cost') else 0.), marginal_cost=costs.at['hydro', 'marginal_cost'], p_max_pu=1., # dispatch p_min_pu=0., # store efficiency_dispatch=costs.at['hydro', 'efficiency'], efficiency_store=0., cyclic_state_of_charge=True, inflow=inflow_t.loc[:, hydro.index])
def transform(da, transform_type, workdir): """Transform data to be more normal using either boxcox or log transform. The transform is performed separately for each month, since the regression model is fit for each month. Parameters ---------- da : xarray.DataArray Untransformed dataarray transform_type : str 'boxcox' or 'log' workdir : str Where to save the boxcox parameters Returns ------- ds_t : xarray.DatArray Transformed dataarray """ # Set all non-positive precip values to trace tmp = da.values tmp[tmp <= 0] = 1e-24 da.values = tmp if transform_type == 'boxcox': lam_save_name = '%s/boxcox_lambda.nc' % workdir if os.path.isfile(lam_save_name): da_lam = xr.open_dataarray(lam_save_name) else: ntime, nlat, nlon = da.shape box_lam = np.nan * np.ones((12, nlat, nlon)) for mo in range(1, 13): print('calculating lambda for month %i' % mo) for ct1 in range(nlat): for ct2 in range(nlon): this_ts = da.isel({ 'time': da['time.month'] == mo, 'lat': ct1, 'lon': ct2 }) if (np.isnan((this_ts.values).astype(float))).all(): continue _, lam = boxcox(this_ts) box_lam[mo - 1, ct1, ct2] = np.min( (lam, 1) ) # set ceiling at 1, since pr is positively skewed # save to netcdf da_lam = xr.DataArray(data=box_lam, dims=('month', 'lat', 'lon'), coords={ 'month': np.arange(1, 13), 'lat': da.lat, 'lon': da.lon }) da_lam.to_netcdf(lam_save_name) # transform data, separately for each month da_t = [] for mo in range(1, 13): x_t = boxcox_forward(da.sel({'time': da['time.month'] == mo}), da_lam.sel({'month': mo})) da_t.append(x_t) da_t = xr.concat(da_t, dim='time') da_t = da_t.sortby('time') elif transform_type == 'log': da_t = np.log(da) else: raise NotImplementedError( 'No other transforms besides Box-Cox and log') return da_t
# remove this dimension da = da.squeeze() if not load_data_lazily: da.load() combined_ds.append(da) return combined_ds # lazily load the whole dataset ds_whole = read_e5_data(test_startyear, test_endyear, variables=variables) # this is now a lazy dask array. do not do any operations on this array outside the data generator below. # if we do operations before, it will severly slow down the data loading throughout the training. # load normalization weights norm_mean = xr.open_dataarray(norm_weights_filenamebase + '_mean.nc').values norm_std = xr.open_dataarray(norm_weights_filenamebase + '_std.nc').values n_data = ds_whole[0].shape[0] N_train = n_data // time_resolution_hours n_valid = int(N_train * valid_split) Nlat, Nlon, = ds_whole[0].shape[1:3] Nlat = Nlat // 2 # only NH n_channels_out = len(variables) n_channels_in = n_channels_out param_string = f'{modelname}_{train_startyear}-{train_endyear}'
"/global/cscratch1/sd/qnicolas/wrfdata/saved/gw.wrf.2D.60lev.500m.3km/wrfout_d01_1970-01-01_00_00_00" ) x_kwargs = {'center': 314, 'flip_x': False, 'dx': 10} ghats_topo = change_coords_sfc(ghats_ideal.HGT[0, 0], **x_kwargs) # Define nb frames print("Total nb of times: ", len(ghats_ideal.Time)) nframes = len(ghats_ideal.Time) time_disc = 24 # frames per day fig, ax = plt.subplots(1, 1, figsize=(15, 5)) # Animate if movietype == 'isentropes': ghats_ideal_theta_z = xr.open_dataarray( "/global/cscratch1/sd/qnicolas/wrfdata/saved/gw.wrf.2D.60lev.500m.3km/diags/wrf.THETA.zinterp.days0-10.nc" )[:, 2:] levels = np.array( ghats_ideal_theta_z.isel( Time=0, distance_from_mtn=0).sel(z=np.arange(1000., 20000., 1000.))) def update(i): print(i) ax.cla() ghats_topo.plot(ax=ax, color='k', linewidth=2.) ghats_ideal_theta_z.isel(Time=i).plot.contour(ax=ax, levels=levels) ax.set_ylim(0., 20000.) ax.set_xlabel("Distance from mountain peak (km)") ax.set_ylabel("Altitude (m)") ax.set_title("Isentropes, Time = %i days %02i h" %
sample_coor_m = sample_coor.resample(time='MS').sum() out_nc = Path(r'H:\CMIP6 - Biased\pr_gamma\nc') #%% for l in ls: bias_p = sorted(list(l.iterdir())) print(l.name) print('------------') for path in bias_p: print(path.name) cor = pd.read_csv(list(Path(path).iterdir())[0], header=None) # print(cor.shape) cor_nc = to_xarray(cor, sample_coor_m.coords) cor_nc.to_netcdf( ut.save_file(out_nc / l.name / ('Biased_' + path.name + '_2015_2100.nc'))) # open_and_format([sce585_path[3], sce585_path[-3]]) #%% nc_path = sorted(list(Path(r'H:\CMIP6 - Biased\pr_gamma\nc\ssp245').iterdir())) mf_ds_arr = [ xr.open_dataarray(p).assign_coords(id=i + 1) for i, p in enumerate(nc_path) ] mf_ds = xr.concat(mf_ds_arr, dim='id') #%% ut.sim_plot(mf_ds.isel(time=0), col='id', col_wrap=4, add_colorbar=False) #%% near_p = Path(r'H:\CMIP6 - Test\cdbc\new\Bias Corrected Rainfall 2015.csv') cor = pd.read_csv(near_p, header=None) cor_nc2 = to_xarray(cor, ut.select_year(sample_coor_m, 2015, 2031).coords)
def main(): # Parse arguments parser = argparse.ArgumentParser() parser.add_argument("config", help="Name of the config file.") parser.add_argument("-t", "--train", action="store_true", help="Run neural network training.") parser.add_argument("-i", "--interp", action="store_true", help="Run interpretation.") parser.add_argument("-p", "--plot", action="store_true", help="Plot interpretation results.") args = parser.parse_args() if not exists(args.config): raise FileNotFoundError(args.config + " not found.") with open(args.config, "r") as config_file: config = yaml.load(config_file, Loader=yaml.Loader) # Load training data print( f"Loading training data period: {config['train_start_date']} to {config['train_end_date']}" ) data_input = {} output = {} out_max = {} labels = {} meta = {} meta_df = {} input_combined = {} input_scaled = {} scale_values = {} predictions = {} modes = ["train", "val", "test"] # Load training, validation, and testing data for mode in modes: data_input[mode], output[mode], meta[mode] = load_patch_files( config[mode + "_start_date"], config[mode + "_end_date"], config["data_path"], config["input_variables"], config["output_variables"], config["meta_variables"], config["patch_radius"]) input_combined[mode] = combine_patch_data(data_input[mode], config["input_variables"]) if mode == "train": input_scaled[mode], scale_values[mode] = min_max_scale( input_combined[mode]) else: input_scaled[mode], scale_values[mode] = min_max_scale( input_combined[mode], scale_values["train"]) out_max[mode] = storm_max_value( output[mode][config["output_variables"][0]], meta[mode]["masks"]) meta_df[mode] = get_meta_scalars(meta[mode]) print(meta_df[mode].columns) if config["classifier"]: labels[mode] = np.where( out_max[mode] >= config["classifier_threshold"], 1, 0) else: labels[mode] = out_max[mode] if not exists(config["out_path"]): makedirs(config["out_path"]) scale_values["train"].to_csv(join(config["out_path"], "scale_values.csv"), index_label="variable") if "get_visible_devices" in dir(tf.config.experimental): gpus = tf.config.experimental.get_visible_devices("GPU") else: gpus = tf.config.get_visible_devices("GPU") for device in gpus: tf.config.experimental.set_memory_growth(device, True) models = {} neuron_activations = {} neuron_scores = {} saliency = {} if args.train: print("Begin model training") for mode in modes: predictions[mode] = pd.DataFrame(0, index=meta_df[mode].index, columns=list( config["models"].keys())) predictions[mode] = pd.merge(meta_df[mode], predictions[mode], left_index=True, right_index=True) for model_name, model_config in config["models"].items(): model_out_path = join(config["out_path"], model_name) if not exists(model_out_path): makedirs(model_out_path) scale_values["train"].to_csv(join( model_out_path, "scale_values_" + model_name + ".csv"), index_label="variable") models[model_name] = BaseConvNet(**model_config) models[model_name].fit(input_scaled["train"].values, labels["train"], val_x=input_scaled["val"].values, val_y=labels["val"]) models[model_name].save_model(model_out_path, model_name) for mode in modes: predictions[mode].loc[:, model_name] = models[model_name].predict( input_scaled[mode].values) for mode in modes: predictions[mode].to_csv(join(config["out_path"], f"predictions_{mode}.csv"), index_label="index") print("Calculate metrics") if config["classifier"]: model_scores = classifier_metrics( labels["test"], predictions["test"][list(config["models"].keys())]) model_scores.to_csv(join(config["out_path"], "model_test_scores.csv"), index_label="model_name") if args.interp: for model_name, model_config in config["models"].items(): if model_name not in models.keys(): model_out_path = join(config["out_path"], model_name) models[model_name] = load_conv_net(model_out_path, model_name) neuron_columns = [ f"neuron_{n:03d}" for n in range(models[model_name].dense_neurons) ] neuron_activations[model_name] = {} neuron_scores[model_name] = pd.DataFrame(0, columns=neuron_columns, index=modes) saliency[model_name] = {} for mode in modes: neuron_activations[model_name][mode] = pd.merge( meta_df[mode], pd.DataFrame(0, columns=neuron_columns, index=meta_df[mode].index), left_index=True, right_index=True) neuron_activations[model_name][ mode].loc[:, neuron_columns] = models[ model_name].output_hidden_layer( input_scaled[mode].values) neuron_activations[model_name][mode].to_csv( join(config["out_path"], f"neuron_activations_{model_name}_{mode}.csv"), index_label="index") saliency[model_name][mode] = models[model_name].saliency( input_scaled[mode]) saliency[model_name][mode].to_netcdf( join(config["out_path"], f"neuron_saliency_{model_name}_{mode}.nc"), encoding={ "saliency": { "zlib": True, "complevel": 4, "shuffle": True, "least_significant_digit": 3 } }) if config["classifier"]: neuron_scores[model_name].loc[mode] = score_neurons( labels[mode], neuron_activations[model_name][mode] [neuron_columns].values) else: neuron_scores[model_name].loc[mode] = score_neurons( labels[mode], neuron_activations[model_name][mode] [neuron_columns].values, metric="r") neuron_scores[model_name].to_csv(join( config["out_path"], f"neuron_scores_{model_name}.csv"), index_label="mode") if args.plot: print("Begin plotting") if "plot_kwargs" not in config.keys(): config["plot_kwargs"] = {} for model_name, model_config in config["models"].items(): print(model_name) if model_name not in models.keys(): model_out_path = join(config["out_path"], model_name) models[model_name] = load_conv_net(model_out_path, model_name) neuron_activations[model_name] = {} neuron_scores[model_name] = pd.read_csv(join( config["out_path"], f"neuron_scores_{model_name}.csv"), index_col="mode") saliency[model_name] = {} for mode in modes: print(mode) if mode not in neuron_activations[model_name].keys(): neuron_activations[model_name][mode] = pd.read_csv( join(config["out_path"], f"neuron_activations_{model_name}_{mode}.csv"), index_col="index") saliency[model_name][mode] = xr.open_dataarray( join(config["out_path"], f"neuron_saliency_{model_name}_{mode}.nc")) for variable_name in config["input_variables"]: print(variable_name) if variable_name not in config["plot_kwargs"].keys(): plot_kwargs = None else: plot_kwargs = config["plot_kwargs"][variable_name] plot_neuron_composites( config["out_path"], model_name + "_" + mode, input_combined[mode], neuron_activations[model_name][mode].values, neuron_scores[model_name].loc[mode].values, variable_name, plot_kwargs=plot_kwargs) plot_saliency_composites( config["out_path"], model_name + "_" + mode, saliency[model_name][mode], neuron_activations[model_name][mode].values, neuron_scores[model_name].loc[mode].values, variable_name) plot_top_activations( config["out_path"], model_name + "_" + mode, input_combined[mode], meta_df[mode], neuron_activations[model_name][mode], neuron_scores[model_name].loc[mode].values, saliency[model_name][mode], variable_name, plot_kwargs=plot_kwargs) return
def arima_and_ystar(acause, agg_version, arima_version, smoothing, years, measure, intercept_shift, gbd_round_id, draws, decay, dryrun=False, no_correction=False, past_version="best", no_arima=False, **kwargs): r"""Samples mortality residuals from an ARIMA and forms $y^* = \hat{y} + \hat{\epsilon}$. :param str acause: name of the target acause to aggregate to. :param str agg_version: name of the aggregate version. :param str arima_version: name of the arima version. :param list[str] smoothing: what dimensions to smooth over during the ARIMA step. :param fbd_core.argparse.YearRange years: a container for the three years which define our forecast. :param int draws: number of draws to take. :param bool dryrun: dryrun flag. This is a test run if True. :param bool bias: Perform log bias correction. """ logger.debug("Opening: {}".format(FILEPATH)) y_hat = xr.open_dataarray(str(FILEPATH)) # GK intercept shift y_hat = gis.intercept_shift_at_draw(y_hat, acause, past_version, gbd_round_id, years, draws) save_xr(y_hat, FILEPATH, root_dir="scratch", metric="rate", space="log") y_past = _get_y_past(acause, years, measure, gbd_round_id, past_version=past_version) past_years = years.past_years if not no_arima: # ARIMA for everything except NTDs logger.info("Computing epsilon_past.") epsilon_past_with_scenarios_and_draws = ( y_past.loc[dict(year_id=past_years)] - y_hat.loc[dict(year_id=past_years)]) epsilon_past = epsilon_past_with_scenarios_and_draws.loc[dict( scenario=0)].mean("draw") try: epsilon_hat = xr.open_dataarray(str(FILEPATH)) except: epsilon_hat = _draw_epsilons(epsilon_past, draws, smoothing, years, acause, decay, gbd_round_id=gbd_round_id) if not dryrun: logger.info("Saving epsilon_hat to {}".format(FILEPATH)) _save_netcdf(epsilon_hat, FILEPATH) y_star = _get_y_star(y_hat, epsilon_hat, years).copy() else: # no arima for ntds y_star = y_hat y_star.name = "value" # intercept shift and bias if intercept_shift: y_star = _intercept_shift(acause, y_star, years, measure, gbd_round_id, draws=draws, no_arima=no_arima, past_version=past_version) if not no_correction: y_star = xr.ufuncs.log(bias_exp(y_star)) if not dryrun: logger.info("Saving y_star to {}".format(FILEPATH)) _save_netcdf(y_star, FILEPATH)
def _get_modeled_y_hat(acause, version, measure, period, gbd_round_id, draws): """Gets mortality data for a modeled acause. For modeled causes, if the data is split by sex, then it is assumed that it is in log rate space. If the data is not split by sex, then it is assumed that it is in normal rate space. :param str acause: acause for a modeled acause. :param str version: name of the mortality or yld version which modeled this acauaArray: the mortality or yld data for acause. """ if period == "past": input_file = FILEPATH / "{}.nc".format(acause) y_hat_exp = xr.open_dataset(str(input_file))["value"] + FLOOR y_hat_exp = resample(y_hat_exp, draws) y_hat = xr.ufuncs.log(y_hat_exp) y_hat.coords["acause"] = acause else: try: logger.info( "No children. y_hat is from mort/yld file {}".format(FILEPATH)) # Because the data is modeled and not split by sex, it is saved in # normal rate space. Log it. y_hat_exp = xr.open_dataarray(str(FILEPATH)) y_hat_exp = resample(y_hat_exp, draws) y_hat = xr.ufuncs.log(y_hat_exp + FLOOR) # some of the yld files are missing acause, so add that info y_hat.coords["acause"] = acause except IOError: # Modeled data is split by sex. input_files = [ FILES for FILES in POTENTIAL_FILES if FILES.exists() ] logger.info("Input results are split by sex. Files are {}".format( input_files)) if len(input_files) == 1: logger.info("This is a sex specific cause. Gotta give it a " "real coordinate on sex.") if "female" in input_files[0].as_posix(): sex_id = 2 else: sex_id = 1 dataarray_one_sex = xr.open_dataarray( str(input_files[0]), drop_variables=["measure", "cov"]) dataarray_one_sex = resample(dataarray_one_sex, draws) new_vals = np.expand_dims(dataarray_one_sex.values, 0) new_dims = ["sex_id"] + list(dataarray_one_sex.dims) logger.info("New dimensions: {}".format(new_dims)) new_coords = ([[sex_id]] + [ coord.values for coord in list( dataarray_one_sex.coords.indexes.values()) ]) y_hat = xr.DataArray( new_vals, dims=new_dims, coords=new_coords).to_dataset(name="value") y_hat.coords["acause"] = acause elif len(input_files) == 2: y_hat = xr.open_mfdataset( [str(input_file) for input_file in input_files], concat_dim="sex_id", drop_variables=["measure", "cov"]) y_hat = resample(y_hat[list(y_hat.data_vars.keys())[0]], draws) else: logger.error(( "{} has no modeled mortality/ylds for version {}. ruh-roh." ).format(acause, version)) raise Exception("Modeled acause has no saved results.") # if data are split by sex, they are in log space. convert back to # regular space to add the floor y_hat = xr.ufuncs.log(xr.ufuncs.exp(y_hat) + FLOOR) return y_hat
def _get_aggregated_y_hat(acause, version, measure, period, gbd_round_id): """Gets expected value of cause specific mortality rates. For aggregate causes, it is assumed that the data is not split by sex and is saved in log rate space. When the children are added to form the aggregated acause result, the summation happens in normal space. Therefore, we must exponentiate the children's rates, add them up, and log them to get an aggregated y_hat in log rate space. The resulting y_hat is in log rate space. :param str acause: name of the target acause to aggregate to. :param str version: name of the aggregation version. :return xarray.DataArray: The expected value of the cause specific mortality rate. """ # connect to db and read in cause hierarchy engine = db.db_engine(NAME, database=DATABASE) session = sessionmaker(bind=engine)() all_causes = get_hierarchy(session, "cause", CAUSE_HIERARCHY_ID)[[ "acause", "cause_id", "parent_id" ]] # subset to just fatal causes cause_strategy_set = get_strategy_set(session, FATAL_GK_STRATEGY_ID, CAUSE_HIERARCHY_ID) cause_hierarchy = get_hierarchy(session, "cause", CAUSE_HIERARCHY_ID) cause_tree, node_map = subset_fatal.make_hierarchy_tree( cause_hierarchy, 294, "cause_id") fatal_subset = subset_fatal.include_up_hierarchy( cause_tree, node_map, cause_strategy_set["cause_id"].values) fatal_causes = all_causes[all_causes.cause_id.isin(fatal_subset)] cause_id = fatal_causes[fatal_causes.acause == acause].cause_id.values[0] children = fatal_causes.query( "parent_id == {}".format(cause_id))["acause"].values logger.info("y_hat is a sum of children: {}".format(children)) # Create a list of child acause files which are not external causes and # check to make sure all the ones we want to sum up are actually present. potential_child_files = [ FBDPath("/{gri}/{p}/{m}/{v}/{c}_hat.nc".format(gri=gbd_round_id, p=period, m=measure, v=version, c=child), root_dir="scratch") for child in children if child not in ("_all", "_none") ] child_files = [ str(child_file) for child_file in potential_child_files if child_file.exists() ] if len(potential_child_files) != len(child_files): logger.error("You are missing files, bud. {} vs {}".format( potential_child_files, child_files)) raise Exception("Missing y_hat files!") logger.debug("Summing these files: {}".format(child_files)) exp_y_hat_sum = None for child_file in child_files: logger.info("Adding {}".format(child_file)) exp_y_hat = xr.ufuncs.exp( xr.open_dataarray(child_file, drop_variables=["measure", "cov"])) if exp_y_hat_sum is None: exp_y_hat_sum = exp_y_hat else: exp_y_hat_broadcasted = xr.broadcast(exp_y_hat_sum, exp_y_hat) exp_y_hat_broadcasted = [ data.fillna(0.) for data in exp_y_hat_broadcasted ] exp_y_hat_sum = sum(exp_y_hat_broadcasted) y_hat = xr.ufuncs.log(exp_y_hat_sum) y_hat.coords["acause"] = acause return y_hat
def setUp(self): self.algorithm = NewAssimilation() state_path = os.path.join(DATA_PATH, 'test_state.nc') self.state = xr.open_dataarray(state_path) obs_path = os.path.join(DATA_PATH, 'test_single_obs.nc') self.obs = xr.open_dataset(obs_path)
def trend_all(): srfc = cnst.ERA5_MONTHLY_SRFC_SYNOP #cnst.ERA_MONTHLY_SRFC_SYNOP pl = cnst.ERA5_MONTHLY_PL_SYNOP #cnst.ERA_MONTHLY_PL_SYNOP mcs = cnst.GRIDSAT + 'aggs/gridsat_WA_-70_monthly_mean_5000km2.nc' fpath = cnst.network_data + 'figs/CLOVER/months/ERA5_WA/' box = [-18, 30, 0, 25] # [-18,40,0,25] # da = xr.open_dataset(pl) #xr.open_dataset(pl) #da = xr.decode_cf(da) da = u_darrays.flip_lat(da) da = da.sel(longitude=slice(box[0], box[1]), latitude=slice(box[2], box[3])) da2 = xr.open_dataset(srfc) #xr.open_dataset(srfc) #da2 = xr.decode_cf(da2) da2 = u_darrays.flip_lat(da2) da2 = da2.sel(longitude=slice(box[0], box[1]), latitude=slice(box[2], box[3])) da3 = xr.open_dataarray(mcs) * 100 da3 = da3.sel(lon=slice(box[0], box[1]), lat=slice(box[2], box[3])) lons = da.longitude lats = da.latitude press = da2['tcwv'] press = press[press['time.hour'] == 12] #press.values = press.values#*1000 low_press = 925 up_press = 650 mid_press = 700 q = da['q'].sel(level=slice(low_press - 20, low_press)).mean('level') q = q[q['time.hour'] == 12] t2d = da2['t2m'] #['t2m'] #t2d = da['t'].sel(level=slice(800, 850)).mean('level') t2d = t2d[t2d['time.hour'] == 12] sh = da2['sshf'] sh = sh[sh['time.hour'] == 12] / -3600 # theta_low = u_met.theta_e(da.level.values, da['t'].sel(level=low_press), da['q'].sel(level=low_press)) # theta_high = u_met.theta_e(da.level.values, da['t'].sel(level=mid_press), da['q'].sel(level=mid_press)) # # theta_e = theta_low - theta_high u600 = da['u'].sel(level=slice(up_press - 20, up_press)).mean('level') u600 = u600[u600['time.hour'] == 12] v600 = da['v'].sel(level=slice(up_press - 20, up_press)).mean('level') v600 = v600[v600['time.hour'] == 12] ws600 = u_met.u_v_to_ws_wd(u600, v600) u800 = da['u'].sel(level=slice(low_press - 20, low_press)).mean('level') u800 = u800[u800['time.hour'] == 12] v800 = da['v'].sel(level=slice(low_press - 20, low_press)).mean('level') v800 = v800[v800['time.hour'] == 12] shear_u = u600 - u800 #u600- shear_v = v600 - v800 # v600- ws_shear = u_met.u_v_to_ws_wd(shear_u.values, shear_v.values) ws_600 = t2d.copy(deep=True) ws_600.name = 'ws' ws_600.values = ws600[0] # shear = t2d.copy(deep=True) # shear.name = 'shear' # shear.values = ws_shear[0] shear = sh u6 = shear_u #u800 v6 = shear_v #v800 q.values = q.values * 1000 grid = t2d.salem.grid.regrid(factor=1) t2 = t2d # grid.lookup_transform(t2d) tir = grid.lookup_transform(da3) #t2d.salem.lookup_transform(da3['tir']) # grid = grid.to_dataset() tir = xr.DataArray(tir, coords=[da3['time'], grid['y'], grid['x']], dims=['time', 'latitude', 'longitude']) months = [ 4, (3, 5), (6, 8), (9, 11) ] #[3,4,5,6,9,10,11]#,4,5,6,9,10,11#,4,5,6,9,10,11,(3,5), (9,11)]#, 10,5,9]#[(12,2)]#[1,2,3,4,5,6,7,8,9,10,11,12]# #,2,3,11,12]#[(12,2)]#[1,2,3,4,5,6,7,8,9,10,11,12]# #,2,3,11,12] dicm = {} dicmean = {} for m in months: method = 'mk' if type(m) == int: m = [m] sig = True t2trend, t2mean = calc_trend(t2, m, method=method, sig=sig, hour=12, wilks=False) #hour=12, t2_mean = t2mean.mean(axis=0) tirtrend, tirmean = calc_trend(tir, m, method=method, sig=sig, wilks=False) tirm_mean = tirmean.mean(axis=0) qtrend, qmean = calc_trend(q, m, method=method, sig=sig, hour=12, wilks=False) #hour=12, q_mean = qmean.mean(axis=0) sheartrend, shearmean = calc_trend(shear, m, method=method, sig=sig, hour=12, wilks=False) #hour=12, shear_mean = shearmean.mean(axis=0) #ipdb.set_trace() presstrend, pressmean = calc_trend(press, m, method=method, sig=sig, hour=12, wilks=False) #hour=12, press_mean = pressmean.mean(axis=0) u6trend, u6mean = calc_trend(u6, m, method=method, sig=sig, hour=12, wilks=False) #hour=12, u6_mean = u6mean.mean(axis=0) v6trend, v6mean = calc_trend(v6, m, method=method, sig=sig, hour=12, wilks=False) #hour=12, v6_mean = v6mean.mean(axis=0) # thetatrend, thetamean = calc_trend(theta_e, m, method=method, sig=sig, hour=12,wilks=False) #hour=12, # theta_mean = thetamean.mean(axis=0) t2trend_unstacked = t2trend * 10. # warming over decade qtrend_unstacked = qtrend * 10. # warming over decade sheartrend_unstacked = sheartrend * 10. # warming over decade u6trend_unstacked = u6trend * 10 v6trend_unstacked = v6trend * 10 presstrend_unstacked = presstrend * 10 # thetatrend_unstacked = thetatrend * 10 tirtrend_unstacked = ( (tirtrend.values) * 10. / tirm_mean.values) * 100. #ipdb.set_trace() tirtrend_out = xr.DataArray(tirtrend_unstacked, coords=[grid['y'], grid['x']], dims=['latitude', 'longitude']) tirtrend_out.name = 'tir' #tirmean_out = xr.DataArray(tirm_mean, coords=[grid['y'], grid['x']], dims=['latitude','longitude']) dicm[m[0]] = tirtrend_out dicmean[m[0]] = tirm_mean t_da = t2trend_unstacked q_da = qtrend_unstacked s_da = sheartrend_unstacked ti_da = tirtrend_out tcwv_da = presstrend_unstacked # theta_da = thetatrend_unstacked if len(m) == 1: fp = fpath + 'use/ERA5_trend_synop_WA_sig_poly_tcwv_1991_skt_' + str( m[0]).zfill(2) + '.png' else: fp = fpath + 'use/ERA5_trend_synop_WA_sig_poly_tcwv_1991_skt_' + str( m[0]).zfill(2) + '-' + str(m[1]).zfill(2) + '.png' map = shear.salem.get_map() ti_da = t2d.salem.transform(ti_da) f = plt.figure(figsize=(15, 8), dpi=300) # transform their coordinates to the map reference system and plot the arrows xx, yy = map.grid.transform(shear.longitude.values, shear.latitude.values, crs=shear.salem.grid.proj) xx, yy = np.meshgrid(xx, yy) #Quiver only every 7th grid point u = u6trend_unstacked.values[1::2, 1::2] v = v6trend_unstacked.values[1::2, 1::2] #Quiver only every 7th grid point uu = u6_mean.values[1::2, 1::2] vv = v6_mean.values[1::2, 1::2] xx = xx[1::2, 1::2] yy = yy[1::2, 1::2] pdic = { 'tlin': (t2_mean.values - 273.15).astype(np.float64), 'tmean': (t2_mean.values - 273.15).astype(np.float64), 'qmean': (q_mean.values).astype(np.float64), 'qlin': q_da.values, 'shearlin': s_da.values, 'u': u, 'v': v, 'xx': xx, 'yy': yy, 'tirmean': tirm_mean, } pkl.dump( dicm, open( cnst.network_data + 'data/CLOVER/saves/storm_frac_synop12UTC_WA.p', 'wb')) ax1 = f.add_subplot(221) map.set_data(t_da.values, interp='linear') # interp='linear' map.set_contour((t2_mean.values - 273.15).astype(np.float64), interp='linear', colors='k', linewidths=0.5, levels=[20, 23, 26, 29, 32, 35]) map.set_plot_params( levels=[-0.5, -0.4, -0.3, -0.2, 0.2, 0.3, 0.4, 0.5], cmap='RdBu_r', extend='both') # levels=np.arange(-0.5,0.51,0.1), #map.set_contour((t2_mean.values).astype(np.float64), interp='linear', colors='k', linewidths=0.5, levels=np.linspace(800,925,8)) #map.set_plot_params(levels=[-0.5,-0.4,-0.3,-0.2,-0.1,-0.05,-0.02, 0.02,0.05,0.1,0.2,0.3,0.4,0.5], cmap='RdBu_r', extend='both') # levels=np.arange(-0.5,0.51,0.1), dic = map.visualize(ax=ax1, title='2m temperature trend | contours: mean T', cbar_title='K decade-1') contours = dic['contour'][0] plt.clabel(contours, inline=True, fontsize=7, fmt='%1.1f') ax2 = f.add_subplot(222) map.set_data(tcwv_da.values, interp='linear') # interp='linear' map.set_contour((press_mean.values).astype(np.float64), interp='linear', colors='k', levels=[20, 30, 40, 50, 60], linewidths=0.5) #[6,8,10,12,14,16] map.set_plot_params( levels=[-1.2, -1, -0.8, -0.6, -0.4, 0.4, 0.6, 0.8, 1, 1.2], cmap='RdBu', extend='both' ) # levels=np.arange(-0.5,0.51,0.1), [-0.6,-0.4,-0.2,0.2,0.4,0.6] dic = map.visualize( ax=ax2, title='925hPa Spec. humidity trend | contours: mean q', cbar_title='g kg-1 decade-1') contours = dic['contour'][0] plt.clabel(contours, inline=True, fontsize=7, fmt='%1.1f') ax3 = f.add_subplot(223) map.set_data(s_da.values, interp='linear') # interp='linear' map.set_contour(shear_mean.values, interp='linear', colors='k', levels=np.arange(10, 150, 8)) #, levels=np.arange(50,300,8), map.set_plot_params(levels=np.array( [-0.8, -0.6, -0.4, -0.2, -0.1, 0.1, 0.2, 0.4, 0.6, 0.8]) * 10, cmap='RdBu_r', extend='both') # levels=np.arange(-0.5,0.51,0.1) dic = map.visualize(ax=ax3, title='Sensible heat flux trend and mean', cbar_title='W m-2 decade-1') contours = dic['contour'][0] plt.clabel(contours, inline=True, fontsize=7, fmt='%1.1f') # qu = ax3.quiver(xx, yy, u, v, scale=60, width=0.002) # # qk = plt.quiverkey(qu, 0.4, 0.03, 1, '1 m s$^{-1}$', # labelpos='E', coordinates='figure') ax4 = f.add_subplot(224) map.set_contour((tirm_mean), interp='linear', levels=[0.1, 0.5, 1, 2.5], colors='k', linewidths=0.5) #.values).astype(np.float64) ti_da.values[ti_da.values == 0] = np.nan map.set_data(ti_da) # coord = [18, 25, -28, -20] geom = shpg.box(coord[0], coord[2], coord[1], coord[3]) #map.set_geometry(geom, zorder=99, color='darkorange', linewidth=3, linestyle='--', alpha=0.3) map.set_plot_params( cmap='viridis', extend='both', levels=np.arange( 10, 51, 10)) # levels=np.arange(20,101,20) #np.arange(20,101,20) dic = map.visualize(ax=ax4, title='-70C cloud cover change | >5000km2', cbar_title='$\%$ decade-1', addcbar=True) contours = dic['contour'][0] plt.clabel(contours, inline=True, fontsize=7, fmt='%1.1f') plt.tight_layout() plt.savefig(fp) plt.close('all') pkl.dump( dicm, open( cnst.network_data + 'data/CLOVER/saves/storm_frac_synop12UTC_WA.p', 'wb')) pkl.dump( dicmean, open( cnst.network_data + 'data/CLOVER/saves/storm_frac_mean_synop12UTC_WA.p', 'wb'))
def dissertation_plot_contour_by_group_individual_bar(setting, z_dim="annual_roi", mkt='TW'): # verify setting if setting not in ("compact", "general"): raise ValueError("unknown setting: {}".format(setting)) # verify z_dim if z_dim not in ('SPA_c', 'daily_VSS', 'annual_roi'): raise ValueError('unknown z_dim:{}'.format(z_dim)) # parameters start_date, end_date = dt.date(2005, 1, 3), dt.date(2018, 12, 28) interval = "{}_{}".format(start_date.strftime("%Y%m%d"), end_date.strftime("%Y%m%d")) max_portfolio_sizes = (5, ) window_sizes = range(50, 240 + 10, 10) # alpha alpha_pcts = [v for v in range(50, 100, 5)] # alphas = ["{:.2f}".format(v / 100.) for v in range(50, 100, 5)] # set_indices = [1, ] set_indices = [1, 2, 3] name = "report_SPSP_CVaR_whole_dissertation_{}_{}_{}.nc".format( setting, start_date.strftime("%Y%m%d"), end_date.strftime("%Y%m%d")) # read report file xarr = xr.open_dataarray(os.path.join(pp.DATA_DIR, name)) print(xarr) import matplotlib as mpl import matplotlib.pyplot as plt # set global font plt.rcParams['font.family'] = 'serif' plt.rcParams['font.serif'] = (['Times New Roman'] + plt.rcParams['font.serif']) # figure size in inches fig = plt.figure(figsize=(16, 12), facecolor='white') # alpha xlim = (50, 95) # rolling window size ylim = (50, 240) group_names = ['{}G{}'.format(mkt, idx + 1) for idx in range(6)] for gdx, group_name in enumerate(group_names): # x-axis, alpha, y-axis: window_sizes ax = fig.add_subplot(2, 3, gdx + 1, xlim=xlim, ylim=ylim) ax.set_title(group_name, y=1.02, fontsize=18) # labelpad - number of points between the axis and its label ax.set_xlabel(r'$\alpha$', fontsize=14, labelpad=-2) ax.set_ylabel(r'$h$', fontsize=14, labelpad=-2) ax.tick_params(labelsize=10, pad=1) ax.set_xticks(alpha_pcts) ax.set_xticklabels(alpha_pcts, fontsize=10) ax.set_yticks(window_sizes) ax.set_yticklabels(window_sizes, fontsize=10) # X: alpha_pcts, Y: window size Xs, Ys = np.meshgrid(alpha_pcts, window_sizes) Zs = np.zeros_like(Xs, dtype=np.float) n_row, n_col = Xs.shape # get z-value for rdx in range(n_row): for cdx in range(n_col): alpha, win_size = Xs[rdx, cdx], Ys[rdx, cdx] z_values = xarr.loc[interval, group_name, set_indices, # all scenarios 5, win_size, "{:.2f}".format(alpha / 100.), z_dim] mean = z_values.mean() if z_dim == 'daily_VSS': Zs[rdx, cdx] = float(mean) * 1e5 else: Zs[rdx, cdx] = float(mean) * 1e2 # print(Zs) lower, high = np.floor(np.min(Zs)), np.ceil(np.max(Zs)) print(group_name, " z_range:", lower, high) if z_dim == 'annual_roi': for _ in range(4): if (lower * 10) % 4: lower -= 0.1 for _ in range(4): if (high * 10) % 4: high += 0.1 print(group_name, "fixed z_range:", lower, high) cm_norm = mpl.colors.Normalize(vmin=lower - 0.1, vmax=high + 0.1, clip=False) color_range = np.arange(lower, high, 0.4) elif z_dim == 'daily_VSS': print('z_dim:', z_dim) cm_norm = mpl.colors.Normalize(vmin=lower - 0.5, vmax=high + 0.5, clip=False) color_range = np.arange(lower, high) elif z_dim == 'SPA_c': print('z_dim:', z_dim) Zs[Zs > 10] = 11 cm_norm = mpl.colors.Normalize(vmin=0, vmax=12, clip=False) color_range = np.arange(0, 12) # contour, projecting on z cset = ax.contourf(Xs, Ys, Zs, cmap=plt.cm.coolwarm, norm=cm_norm, levels=color_range) # color bar if z_dim == 'annual_roi': cbar = fig.colorbar(cset, ax=ax) cbar.ax.tick_params(labelsize=12) cbar_label_name = "Annual return (%)" elif z_dim == 'daily_VSS': cbar = fig.colorbar(cset, ax=ax) cbar.ax.tick_params(labelsize=12) cbar_label_name = r"Daily VSS (10$^{-5}$)" elif z_dim == 'SPA_c': ticks = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, ">10"] cbar = fig.colorbar(cset, ax=ax, ticks=np.arange(12)) cbar.ax.tick_params(labelsize=12) cbar_label_name = "SPA (%)" cbar.set_ticklabels(ticks) cbar.set_label(cbar_label_name, labelpad=1, size=18) fig_path = os.path.join(pp.TMP_DIR, "{}_{}.pdf".format(mkt, z_dim)) plt.savefig(fig_path, dpi=240, format='pdf') plt.show()
xsave = str(int(xv)) outanomDir = outPath+ft+'/%(m)s/'+va+'/'+str(pl)+'/daily/anom/' anomfname = 'daily_anomalies.y'+ysave+'.x'+xsave+'.nc' outmmeDir = outPath+ft+'/MME/'+va+'/'+str(pl)+'/daily/anom/' if not os.path.isdir(outmmeDir): os.makedirs(outmmeDir) # Update file names anomfname = starttime+'.'+endtime+'.'+anomfname modellist = ['30LCESM1', '46LCESM1', 'CCSM4', 'FIMr1p1', 'GEFS', 'GEM', 'GEOS_V2p1', 'NESM'] # create an empty multi-model ensemble file made up of # days from startS and endS and leadtime of up to 45 days # Read in one model to get leadtime coords da = xr.open_dataarray(_moddir+anomfname) _dates = pd.date_range(starttime, endtime, freq='D') _L = [ pd.Timedelta(12,'h') + pd.Timedelta(days=i) for i in range(45) ] x = np.empty((len(modellist), len(_dates), len(_L))) x.fill(np.nan) mme_ds = xr.DataArray(x, coords={'X': da.X, 'L': da.L, 'Y': da.Y, 'P': da.P, 'S': _dates, 'model': modellist}, dims=['model', 'S', 'L']) # Populate mme_da for i, model in enumerate(modellist): _moddir = outanomDir % {'m':model} da = xr.open_dataarray(_moddir+anomfname) da = da.mean(dim='M') # Find indices to populate start date idates = np.ones(len(da.S), dtype=np.int16) for j in range(len(idates)):
alpha = xr.open_mfdataset(era_path + "/eff_ws/era5_alpha_NZ_*.nc", chunks={'time': 46}) #.sel(time=slice('1997','2020')) # load windpark data windparks = pd.read_csv(nz_path + "/windparks_NZ.csv", delimiter=';', parse_dates=['commissioning']) # calculate specific power of turbines (in W) windparks['sp'] = windparks.turb_cap * 10**6 / (windparks.d_rotor**2 * np.pi / 4) # with GWA outfile = results_path + '/windpower_??_ERA5_GWA.nc' if results_path + '/windpower_NZ_ERA5_GWA.nc' not in glob.glob(outfile): print('calculating ERA5 NZ GWA') if GWA == "3": GWA = xr.open_rasterio(nz_path + '/GWA/GWA3_NZ100m.tif') else: GWA = xr.open_dataarray(nz_path + '/GWA/GWA2_NZ100m.nc') wps = windpower_simulation_era5( wind.wh100, alpha.alpha, windparks.Height.values, windparks.Capacity.values, windparks.sp.values, windparks.Longitude.values, windparks.Latitude.values, windparks.commissioning.values, startyear, GWA, startGWA, endGWA) # save as netcdf wps.drop(['x', 'y' ]).to_dataset(name='wp').to_netcdf(results_path + "/windpower_NZ_ERA5_GWA.nc")
@numba.njit def nanmean(v, w): return _nanmean(v, w) filepath = '/net/so4/landclim/bverena/large_files/data_small.nc' comm = MPI.COMM_WORLD rank = comm.Get_rank() print(rank) # open data # Every rank is reading in the file. This is probably not optimal, rather one rank should read the file and send each variable to a different rank. print(f'open data') data = xr.open_dataarray(filepath) """ # subset more for speedup of first tests print(f'subset even more because very large dataset') data = data[:,::10,:,:] """ shape = np.shape(data) # create a mask of nans mask = ~np.isnan(data) # nan values have zero weight (i.e. are False) # gapfilling the missing values with spatiotemporal mean print('gapfilling missing values with spatiotemporal mean') tic = datetime.now() var = rank result = _nanmean(data.values[var, :, :, :], mask.values[var, :, :, :])
# da[8, :, :] = da[7, :, :] # assign the time stamps: da.coords['time'] = pd.date_range(start='1/1/2017', end='29/12/2019', freq='12D') # test the bfast4openeo function: breaks = bfast4openeo(da) # plot ther result: plt.imshow(breaks.values) # save the result: breaks.to_netcdf('offline_bfastPy_output.nc') # load the results: breaks = xr.open_dataarray('offline_bfastPy_output.nc') breaks = breaks.sortby('x') breaks = breaks.sortby('y') aoi = breaks.sel(y=slice(9611198, 9611750), x=slice(742740, 743297)) aoi.plot() # ------------ start_monitor = datetime(2019, 1, 1) end_monitor = datetime(2019, 12, 31) # get dates from monitoring period: dates = pd.date_range(start='1/1/2017', end='29/12/2019', freq='12D') dates_mon = dates[dates.slice_indexer(start_monitor, end_monitor)] # convert to the fraction of the year: frac_of_year = np.array(dates_mon.year + dates_mon.dayofyear / 365.) # convert aoi values to the fraction of the year:
object_file = args.objects_file.replace('.nc', '') if not 'objects' in object_file: raise Exception() base_name, objects_mask = object_file.split('.objects.') out_filename = FN_FORMAT.format( base_name=base_name, objects_name=object_mask ) fn_objects = "{}.nc".format(object_file) if not os.path.exists(fn_objects): raise Exception("Couldn't find objects file `{}`".format(fn_objects)) objects = xr.open_dataarray(fn_objects, decode_times=False) ds = main(objects) ds.attrs['input_name'] = args.objects_file ds.attrs['mask'] = objects_mask ds.to_netcdf(out_filename) print("Wrote output to `{}`".format(out_filename)) if args.make_plot: import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt import topology.plots.overview
def symbol_statistics(start_date=dt.date(1990, 1, 1), end_date=dt.date(2017, 12, 31)): """ the statistics of the return of the specified stocks """ import csv import json import statsmodels.tsa.stattools as tsa_tools import scipy.stats as spstats import portfolio_programming.statistics.risk_adjusted as risk_adj import arch.bootstrap.multiple_comparison as arch_comp symbols = json.load(open(os.path.join(pp.DATA_DIR, 'DJIA_symbols_20170901.json'))) data_xarr = xr.open_dataarray(os.path.join(pp.DATA_DIR, 'DJIA_symbols_20170901.nc')) with open(os.path.join(pp.TMP_DIR, 'DJIA_symbols_20170901_stat.csv'), 'w') as csv_file: fields = ["rank", 'symbol', 'start_date', 'end_date', "n_data", "cum_roi", "annual_roi", "roi_mu", "std", "skew", "ex_kurt", "Sharpe", "Sortino", "JB", "worst_ADF", "SPA_c"] writer = csv.DictWriter(csv_file, fieldnames=fields) writer.writeheader() for sdx, symbol in enumerate(symbols): rois = data_xarr.loc[start_date:end_date, symbol, 'simple_roi'] trans_dates = rois.get_index('trans_date') rois = rois.data # to numpy rois = rois[~np.isnan(rois)] # filter the nan n_roi = len(rois) rois[0] = 0 cumulative_roi = float((1 + rois).prod() - 1) annual_roi = float(np.power(cumulative_roi + 1, 1. / 10) - 1) sharpe = risk_adj.Sharpe(rois) sortino = risk_adj.Sortino_full(rois)[0] jb = spstats.jarque_bera(rois)[1] # worse case of adf adf_c = tsa_tools.adfuller(rois, regression='c')[1] adf_ct = tsa_tools.adfuller(rois, regression='ct')[1] adf_ctt = tsa_tools.adfuller(rois, regression='ctt')[1] adf_nc = tsa_tools.adfuller(rois, regression='nc')[1] adf = max(adf_c, adf_ct, adf_ctt, adf_nc) spa_value = 0 for _ in range(5): spa = arch_comp.SPA(rois, np.zeros(n_roi), reps=1000) spa.seed(np.random.randint(0, 2 ** 31 - 1)) spa.compute() # preserve the worse p_value if spa.pvalues[1] > spa_value: spa_value = spa.pvalues[1] writer.writerow({ "rank": sdx + 1, "symbol": symbol, "start_date": trans_dates[0].strftime("%Y-%m-%d"), "end_date": trans_dates[-1].strftime("%Y-%m-%d"), "n_data": n_roi, "cum_roi": cumulative_roi, "annual_roi": annual_roi, "roi_mu": float(rois.mean()), "std": float(rois.std(ddof=1)), "skew": spstats.skew(rois, bias=False), "ex_kurt": spstats.kurtosis(rois, bias=False), "Sharpe": sharpe, "Sortino": sortino, "JB": jb, "worst_ADF": adf, "SPA_c": spa_value, }) print("[{}/{}] {}, cum_roi:{:.2%}".format( sdx + 1, len(symbols), symbol, cumulative_roi))
def choose_block(parameter_dir, varnames, percentile_threshold=97): """Calculate a block size for all variables, months, and locations using the Wilks (1997) JClim formula. Parameters ---------- parameter_dir : str Parent directory for parameter files varnames : list List of (standard) variable names to be considered, i.e. ['tas', 'pr', 'slp'] percentile_threshold : float The percentile of estimated blocks to use universally. Returns ------- block_use : int Suggested block size in years block_use_mo : int Suggested block size in months """ # Initialize with smallest block (in years) block_use = 1 for this_varname in varnames: this_dir = '%s/%s' % (parameter_dir, this_varname) fname = '%s/residual.nc' % this_dir da = xr.open_dataarray(fname) _, nlat, nlon = np.shape(da) has_data = ~np.isnan(da[-1, ...].values) datavec = da.values[:, has_data] # We want to know the extent to which there is year-to-year memory (not seasonal) # Calculate block size for each month, gridbox ntime, nbox = np.shape(datavec) block_est = np.empty((12, nbox)) def rhs(L): return (n - L + 1)**((2 / 3) * (1 - n_eff / n)) for i in range(12): for j in range(nbox): this_ts = datavec[i::12, j] # estimate rho rho = np.corrcoef(this_ts[1:], this_ts[:-1])[0, 1] n = len(this_ts) # Wilks equation is implicit, so need to solve iteratively n_eff = n * (1 - rho) / (1 + rho) # As per Wilks 1997, start with a guess of L = sqrt(n) L = int(np.sqrt(n)) while L > rhs(L): L -= 1 while L < rhs(L): L += 1 if L > rhs(L): L -= 1 block_est[i, j] = L new_block = np.percentile(block_est.flatten(), percentile_threshold) if new_block > block_use: block_use = new_block print('Block size: %d years' % block_use) block_use_mo = block_use * 12 # switch to months return block_use, block_use_mo
def grdfilter(grid, **kwargs): """ Filter a grid in the space (or time) domain. Filter a grid file in the time domain using one of the selected convolution or non-convolution isotropic or rectangular filters and compute distances using Cartesian or Spherical geometries. The output grid file can optionally be generated as a sub-region of the input (via *region*) and/or with new increment (via *spacing*) or registration (via *toggle*). In this way, one may have "extra space" in the input data so that the edges will not be used and the output can be within one half-width of the input edges. If the filter is low-pass, then the output may be less frequently sampled than the input. Full option list at :gmt-docs:`grdfilter.html` {aliases} Parameters ---------- grid : str or xarray.DataArray The file name of the input grid or the grid loaded as a DataArray. outgrid : str or None The name of the output netCDF file with extension .nc to store the grid in. filter : str ``xwidth[/width2][modifiers]``. Name of filter type you which to apply, followed by the width b: Box Car; c: Cosine Arch; g: Gaussian; o: Operator; m: Median; p: Maximum Likelihood probability; h: histogram Example: F='m600' for a median filter with width of 600 distance : str Distance *flag* tells how grid (x,y) relates to filter width as follows: p: grid (px,py) with *width* an odd number of pixels; Cartesian distances. 0: grid (x,y) same units as *width*, Cartesian distances. 1: grid (x,y) in degrees, *width* in kilometers, Cartesian distances. 2: grid (x,y) in degrees, *width* in km, dx scaled by cos(middle y), Cartesian distances. The above options are fastest because they allow weight matrix to be computed only once. The next three options are slower because they recompute weights for each latitude. 3: grid (x,y) in degrees, *width* in km, dx scaled by cosine(y), Cartesian distance calculation. 4: grid (x,y) in degrees, *width* in km, Spherical distance calculation. 5: grid (x,y) in Mercator ``projection='m1'`` img units, *width* in km, Spherical distance calculation. spacing : str ``xinc[+e|n][/yinc[+e|n]]``. x_inc [and optionally y_inc] is the grid spacing. nans : str or float ``i|p|r``. Determine how NaN-values in the input grid affects the filtered output. {R} toggle : bool Toggle the node registration for the output grid so as to become the opposite of the input grid. [Default gives the same registration as the input grid]. {V} Returns ------- ret: xarray.DataArray or None Return type depends on whether the *outgrid* parameter is set: - xarray.DataArray if *outgrid* is not set - None if *outgrid* is set (grid output will be stored in *outgrid*) Examples -------- >>> import os >>> import pygmt >>> # Apply a filter of 600km (full width) to the @earth_relief_30m file >>> # and return a filtered field (saved as netcdf) >>> pygmt.grdfilter( ... grid="@earth_relief_30m", ... filter="m600", ... distance="4", ... region=[150, 250, 10, 40], ... spacing=0.5, ... outgrid="filtered_pacific.nc", ... ) >>> os.remove("filtered_pacific.nc") # cleanup file >>> # Apply a gaussian smoothing filter of 600 km in the input data array, >>> # and returns a filtered data array with the smoothed field. >>> grid = pygmt.datasets.load_earth_relief() >>> smooth_field = pygmt.grdfilter(grid=grid, filter="g600", distance="4") """ kind = data_kind(grid) with GMTTempFile(suffix=".nc") as tmpfile: with Session() as lib: if kind == "file": file_context = dummy_context(grid) elif kind == "grid": file_context = lib.virtualfile_from_grid(grid) else: raise GMTInvalidInput("Unrecognized data type: {}".format(type(grid))) with file_context as infile: if "G" not in kwargs.keys(): # if outgrid is unset, output to tempfile kwargs.update({"G": tmpfile.name}) outgrid = kwargs["G"] arg_str = " ".join([infile, build_arg_string(kwargs)]) lib.call_module("grdfilter", arg_str) if outgrid == tmpfile.name: # if user did not set outgrid, return DataArray with xr.open_dataarray(outgrid) as dataarray: result = dataarray.load() _ = result.gmt # load GMTDataArray accessor information else: result = None # if user sets an outgrid, return None return result
def setUp(self): state_path = os.path.join(DATA_PATH, 'test_state.nc') self.state = xr.open_dataarray(state_path) obs_path = os.path.join(DATA_PATH, 'test_single_obs.nc') self.obs = xr.open_dataset(obs_path) self.operator = BernoulliOperator()
def grdcut(grid, **kwargs): """ Extract subregion from a grid. Produce a new *outgrid* file which is a subregion of *grid*. The subregion is specified with *region*; the specified range must not exceed the range of *grid* (but see *extend*). If in doubt, run :meth:`pygmt.grdinfo` to check range. Alternatively, define the subregion indirectly via a range check on the node values or via distances from a given point. Finally, you can give *projection* for oblique projections to determine the corresponding rectangular *region* setting that will give a grid that fully covers the oblique domain. Full option list at :gmt-docs:`grdcut.html` {aliases} Parameters ---------- grid : str or xarray.DataArray The file name of the input grid or the grid loaded as a DataArray. outgrid : str or None The name of the output netCDF file with extension .nc to store the grid in. {J} {R} extend : bool or int or float Allow grid to be extended if new *region* exceeds existing boundaries. Give a value to initialize nodes outside current region. circ_subregion : str ``'lon/lat/radius[unit][+n]'``. Specify an origin (*lon* and *lat*) and *radius*; append a distance *unit* and we determine the corresponding rectangular region so that all grid nodes on or inside the circle are contained in the subset. If **+n** is appended we set all nodes outside the circle to NaN. z_subregion : str ``'[min/max][+n|N|r]'``. Determine a new rectangular region so that all nodes outside this region are also outside the given z-range [-inf/+inf]. To indicate no limit on *min* or *max* only, specify a hyphen (-). Normally, any NaNs encountered are simply skipped and not considered in the range-decision. Append **+n** to consider a NaN to be outside the given z-range. This means the new subset will be NaN-free. Alternatively, append **+r** to consider NaNs to be within the data range. In this case we stop shrinking the boundaries once a NaN is found [Default simply skips NaNs when making the range decision]. Finally, if your core subset grid is surrounded by rows and/or columns that are all NaNs, append **+N** to strip off such columns before (optionally) considering the range of the core subset for further reduction of the area. {V} Returns ------- ret: xarray.DataArray or None Return type depends on whether the *outgrid* parameter is set: - xarray.DataArray if *outgrid* is not set - None if *outgrid* is set (grid output will be stored in *outgrid*) """ kind = data_kind(grid) with GMTTempFile(suffix=".nc") as tmpfile: with Session() as lib: if kind == "file": file_context = dummy_context(grid) elif kind == "grid": file_context = lib.virtualfile_from_grid(grid) else: raise GMTInvalidInput("Unrecognized data type: {}".format(type(grid))) with file_context as infile: if "G" not in kwargs.keys(): # if outgrid is unset, output to tempfile kwargs.update({"G": tmpfile.name}) outgrid = kwargs["G"] arg_str = " ".join([infile, build_arg_string(kwargs)]) lib.call_module("grdcut", arg_str) if outgrid == tmpfile.name: # if user did not set outgrid, return DataArray with xr.open_dataarray(outgrid) as dataarray: result = dataarray.load() _ = result.gmt # load GMTDataArray accessor information else: result = None # if user sets an outgrid, return None return result
def stocksp_cor15_plot_2d_contour_by_alpha(setting, z_dim="cum_roi"): """ The 2 x 5 contour diagrams in the paper are generated by the function """ # verify setting if setting not in ("compact", "general"): raise ValueError("unknown setting: {}".format(setting)) # verify z_dim if z_dim not in ('cum_roi', 'daily_VSS', 'annual_roi'): raise ValueError('unknown z_dim:{}'.format(z_dim)) # parameters start_date, end_date = dt.date(2005, 1, 3), dt.date(2014, 12, 31) max_portfolio_sizes = range(5, 50 + 5, 5) window_sizes = range(60, 240 + 10, 10) alphas = ["{:.2f}".format(v / 100.) for v in range(50, 100, 5)] set_indices = [1, 2, 3] name = "report_SPSP_CVaR_whole_{}_{}_{}.nc".format( setting, start_date.strftime("%Y%m%d"), end_date.strftime("%Y%m%d")) # read report file xarr = xr.open_dataarray(open(os.path.join(pp.DATA_DIR, name), 'rb')) import matplotlib as mpl import matplotlib.pyplot as plt # figure size in inches fig = plt.figure(figsize=(64, 48), facecolor='white') # set color range if z_dim == 'cum_roi': cm_norm = mpl.colors.Normalize(vmin=-100, vmax=300, clip=False) color_range = np.arange(-100, 300 + 10, 20) elif z_dim == "daily_VSS": cm_norm = mpl.colors.Normalize(vmin=0, vmax=4, clip=False) color_range = np.arange(0, 4 + 0.2, 0.3) elif z_dim == 'annual_roi': pass xlim = (5, 50) ylim = (60, 240) for adx, alpha in enumerate(alphas): # x-axis, max_portfolio_size, y-axis: window_sizes ax = fig.add_subplot(2, 5, adx + 1, xlim=xlim, ylim=ylim) ax.set_title(r'$\alpha$ = {:.0%}'.format(float(alpha)), y=1.02, fontsize=18) # labelpad - number of points between the axis and its label ax.set_xlabel(r'$M$', fontsize=14, labelpad=-2, fontname="Times New Roman") ax.set_ylabel(r'$h$', fontsize=14, labelpad=-2, fontname="Times New Roman") ax.tick_params(labelsize=10, pad=1) ax.set_xticks(max_portfolio_sizes) ax.set_xticklabels(max_portfolio_sizes, fontsize=10, fontname="Times New Roman") ax.set_yticks(window_sizes) ax.set_yticklabels(window_sizes, fontsize=10, fontname="Times New Roman") Xs, Ys = np.meshgrid(max_portfolio_sizes, window_sizes) Zs = np.zeros_like(Xs, dtype=np.float) n_row, n_col = Xs.shape for rdx in range(n_row): for cdx in range(n_col): n_symbol, win_size = Xs[rdx, cdx], Ys[rdx, cdx] z_values = xarr.loc[ "{}_{}".format(start_date.strftime("%Y%m%d"), end_date.strftime("%Y%m%d")), set_indices, n_symbol, win_size, alpha, z_dim] mean = z_values.mean() Zs[rdx, cdx] = float(mean) * 100. print("Z_dim:", z_dim) print("z_range:", np.min(Zs), np.max(Zs)) print(Zs) # contour, projecting on z cset = ax.contourf(Xs, Ys, Zs, cmap=plt.cm.coolwarm, norm=cm_norm, levels=color_range) # share color bar, rect [left, bottom, width, height] cbar_ax = fig.add_axes([0.92, 0.125, 0.015, 0.75]) # print fig.get_axes() cbar = fig.colorbar(cset, ax=fig.get_axes(), cax=cbar_ax, ticks=color_range) cbar.ax.tick_params(labelsize=12) if z_dim == "cum_roi": cbar_label_name = "Average cumulative returns (%)" elif z_dim == "daily_VSS": cbar_label_name = "Average daily VSS (%)" cbar.set_label(cbar_label_name, labelpad=1, size=20, fontname="Times New Roman") plt.show()
for n, i in enumerate(tqdm.tqdm(idxs_window)): obj_mask = da_.where(da_ == i, other=0) y_mean = obj_mask.yt.where(obj_mask).mean() d = (y_mean - y_min) / (y_max - y_min) obj_mask.max(dim="yt").plot.contour( y="zt", ax=ax, add_colorbar=False, levels=[0.5], alpha=d ) ax.set_aspect(1) if __name__ == "__main__": import argparse argparser = argparse.ArgumentParser(__doc__) argparser.add_argument("objects_filename") argparser.add_argument("--frac", default=0.9, type=float) argparser.add_argument("--lx", default=10e3, type=float) args = argparser.parse_args() da = xr.open_dataarray(args.objects_filename, decode_times=False) plot_outline(da=da, lx=args.lx, frac=args.frac) fn_out = args.objects_filename.replace(".nc", ".outlines.png") plt.savefig(fn_out) print("Saved plot to {}".format(fn_out))
def make_pwqd_TEMP_files(self): """ quadratically detrends annually averaged TEMP field at each point for selected 250 year segments of CTRL or LPD simulations pwqd : `point wise quadratically detrended` """ if self.run == 'ctrl': path = f'{path_prace}/ctrl_rect' interp = '.interp900x602' mf_fn = f'{path}/TEMP_PD_yrly_*.interp900x602.nc' trange = np.arange(50, 300) km = 42 z = 'depth_t' elif self.run == 'lpd': path = f'{path_prace}/lpd' interp = '' mf_fn = f'{path}/ocn_yrly_TEMP_PD_*.nc' trange = np.arange(0, 250) km = 60 z = 'z_t' # concatenate yearly files yrly_TEMP_file = f'{path}/TEMP_yrly{interp}.nc' try: # assert 1==0 assert os.path.exists(yrly_TEMP_file) except: print('making yrly TEMP file') da = xr.open_mfdataset(mf_fn, concat_dim='time').TEMP da = da.isel(time=trange) da.assign_coords(time=da.time.values).to_netcdf(yrly_TEMP_file) da.close() # calculating detrended TEMP field for each vertical level b/c of memory limitations for k in tqdm(range(km)): fn = f'{path}/TEMP_yrly_pwqd_{k:02d}{interp}.nc' try: # assert 1==0 assert os.path.exists(fn) except: da_k = xr.open_dataarray(yrly_TEMP_file, decode_times=False).isel({z: k}) da_pwqd_k = da_k - xr_quadtrend(da_k) da_pwqd_k.to_netcdf(fn) da_pwqd_k.close() # concatenating print(f'{path}/TEMP_yrly_pwqd_*{interp}.nc') da_pwqd = xr.open_mfdataset(f'{path}/TEMP_yrly_pwqd_*{interp}.nc', concat_dim=['depth_t'], chunks={'time': 1}) if self.run == 'ctrl': da_pwqd = da_pwqd.assign_coords(time=np.arange(51, 301)) elif self.run == 'lpd': da_pwqd = da_pwqd.assign_coords(time=np.arange(154, 404)) # da = xr.open_dataarray(yrly_TEMP_file, decode_times=False) # da_pwqd = da - xr_quadtrend(da) # writing out files for individual years print(da_pwqd.time) for i, y in tqdm(enumerate(da_pwqd.time)): # 9 mins for ctrl da_pwqd.isel(time=i).to_netcdf( f'{path}/TEMP_pwqd_yrly_{int(y.values):04d}{interp}.nc') return
def load_earth_relief(resolution="01d", region=None, registration=None): """ Load Earth relief grids (topography and bathymetry) in various resolutions. The grids are downloaded to a user data directory (usually ``~/.gmt/server/earth/earth_relief/``) the first time you invoke this function. Afterwards, it will load the grid from the data directory. So you'll need an internet connection the first time around. These grids can also be accessed by passing in the file name ``'@earth_relief_rru[_reg]'`` to any grid plotting/processing function. Refer to :gmt-docs:`datasets/remote-data.html` for more details. Parameters ---------- resolution : str The grid resolution. The suffix ``d``, ``m`` and ``s`` stand for arc-degree, arc-minute and arc-second. It can be ``'01d'``, ``'30m'``, ``'20m'``, ``'15m'``, ``'10m'``, ``'06m'``, ``'05m'``, ``'04m'``, ``'03m'``, ``'02m'``, ``'01m'``, ``'30s'``, ``'15s'``, ``'03s'``, or ``'01s'``. region : str or list The subregion of the grid to load. Required for Earth relief grids with resolutions <= 05m. registration : str Grid registration type. Either ``pixel`` for pixel registration or ``gridline`` for gridline registration. Default is ``None``, where a pixel-registered grid is returned unless only the gridline-registered grid is available. Returns ------- grid : xarray.DataArray The Earth relief grid. Coordinates are latitude and longitude in degrees. Relief is in meters. Notes ----- The DataArray doesn's support slice operation, for Earth relief data with resolutions higher than "05m", which are stored as smaller tiles. Examples -------- >>> # load the default grid (pixel-registered 01d grid) >>> grid = load_earth_relief() >>> # load the 30m grid with "gridline" registration >>> grid = load_earth_relief("30m", registration="gridline") >>> # load high-resolution grid for a specific region >>> grid = load_earth_relief( ... "05m", region=[120, 160, 30, 60], registration="gridline" ... ) """ # earth relief data stored as single grids for low resolutions non_tiled_resolutions = ["01d", "30m", "20m", "15m", "10m", "06m"] # earth relief data stored as tiles for high resolutions tiled_resolutions = [ "05m", "04m", "03m", "02m", "01m", "30s", "15s", "03s", "01s" ] if registration in ("pixel", "gridline", None): # If None, let GMT decide on Pixel/Gridline type reg = f"_{registration[0]}" if registration else "" else: raise GMTInvalidInput( f"Invalid grid registration: {registration}, should be either " "'pixel', 'gridline' or None. Default is None, where a " "pixel-registered grid is returned unless only the " "gridline-registered grid is available.") # different ways to load tiled and non-tiled earth relief data if resolution in non_tiled_resolutions: if region is not None: raise NotImplementedError( f"'region' is not supported for Earth relief resolution '{resolution}'" ) fname = which(f"@earth_relief_{resolution}{reg}", download="a") with xr.open_dataarray(fname) as dataarray: grid = dataarray.load() _ = grid.gmt # load GMTDataArray accessor information elif resolution in tiled_resolutions: # Titled grid can't be sliced. # See https://github.com/GenericMappingTools/pygmt/issues/524 if region is None: raise GMTInvalidInput( f"'region' is required for Earth relief resolution '{resolution}'" ) grid = grdcut(f"@earth_relief_{resolution}{reg}", region=region) else: raise GMTInvalidInput( f'Invalid Earth relief resolution "{resolution}"') # Add some metadata to the grid grid.name = "elevation" grid.attrs["long_name"] = "elevation relative to the geoid" grid.attrs["units"] = "meters" grid.attrs["vertical_datum"] = "EMG96" grid.attrs["horizontal_datum"] = "WGS84" # Remove the actual range because it gets outdated when indexing the grid, # which causes problems when exporting it to netCDF for usage on the # command-line. grid.attrs.pop("actual_range") for coord in grid.coords: grid[coord].attrs.pop("actual_range") return grid
def plot_yearly_2d_contour_by_alpha(setting, z_dim="cum_roi"): # verify setting if setting not in ("compact", "general"): raise ValueError("unknown setting: {}".format(setting)) start_date, end_date = dt.date(2005, 1, 3), dt.date(2017, 12, 29) name = "report_SPSP_CVaR_yearly_{}_{}_{}.nc".format( setting, start_date.strftime("%Y%m%d"), end_date.strftime("%Y%m%d")) # yearly interval years = [[dt.date(2005, 1, 3), dt.date(2005, 12, 30)], [dt.date(2006, 1, 2), dt.date(2006, 12, 29)], [dt.date(2007, 1, 2), dt.date(2007, 12, 31)], [dt.date(2008, 1, 2), dt.date(2008, 12, 31)], [dt.date(2009, 1, 5), dt.date(2009, 12, 31)], [dt.date(2010, 1, 4), dt.date(2010, 12, 31)], [dt.date(2011, 1, 3), dt.date(2011, 12, 30)], [dt.date(2012, 1, 2), dt.date(2012, 12, 28)], [dt.date(2013, 1, 2), dt.date(2013, 12, 31)], [dt.date(2014, 1, 2), dt.date(2014, 12, 31)], [dt.date(2015, 1, 5), dt.date(2015, 12, 31)], [dt.date(2016, 1, 4), dt.date(2016, 12, 30)], [dt.date(2017, 1, 3), dt.date(2017, 12, 29)]] # read report file xarr = xr.open_dataarray(open(os.path.join(pp.DATA_DIR, name), 'rb')) # parameters max_portfolio_sizes = range(5, 50 + 5, 5) window_sizes = range(60, 240 + 10, 10) alphas = ["{:.2f}".format(v / 100.) for v in range(50, 100, 5)] set_indices = [1, 2, 3] import matplotlib as mpl import matplotlib.pyplot as plt for start, end in years: # figure size in inches fig = plt.figure(figsize=(64, 48), facecolor='white') fig.suptitle( 'TAIEX_20050103_50largest_listed_market_cap {} {}-{}'.format( setting, start.strftime("%Y-%m-%d"), end.strftime("%Y-%m-%d")), fontsize=20) xlim = (5, 50) ylim = (60, 240) for adx, alpha in enumerate(alphas): # x-axis, max_portfolio_size, y-axis: window_sizes ax = fig.add_subplot(2, 5, adx + 1, xlim=xlim, ylim=ylim) ax.set_title(r'$\alpha$ = {:.0%}'.format(float(alpha)), y=1.02, fontsize=18) # labelpad - number of points between the axis and its label ax.set_xlabel(r'$M$', fontsize=14, labelpad=-2, fontname="Times New Roman") ax.set_ylabel(r'$h$', fontsize=14, labelpad=-2, fontname="Times New Roman") ax.tick_params(labelsize=10, pad=1) ax.set_xticks(max_portfolio_sizes) ax.set_xticklabels(max_portfolio_sizes, fontsize=10, fontname="Times New Roman") ax.set_yticks(window_sizes) ax.set_yticklabels(window_sizes, fontsize=10, fontname="Times New Roman") Xs, Ys = np.meshgrid(max_portfolio_sizes, window_sizes) Zs = np.zeros_like(Xs, dtype=np.float) n_row, n_col = Xs.shape for rdx in range(n_row): for cdx in range(n_col): n_symbol, win_size = Xs[rdx, cdx], Ys[rdx, cdx] z_values = xarr.loc["{}_{}".format( start.strftime("%Y%m%d"), end.strftime("%Y%m%d")), set_indices, n_symbol, win_size, alpha, z_dim] mean = z_values.mean() Zs[rdx, cdx] = float(mean) * 100. # if Zs[rdx, cdx] > 10: # Zs[rdx, cdx] = 10.5 print("Z_dim:", z_dim) print("z_range:", np.min(Zs), np.max(Zs)) z_min = int(np.floor(np.min(Zs))) z_max = int(np.ceil(np.max(Zs))) # set color range if z_dim == 'cum_roi': cm_norm = mpl.colors.Normalize(vmin=z_min, vmax=z_max, clip=False) color_range = np.arange(z_min, z_max + 1) # contour, projecting on z cset = ax.contourf(Xs, Ys, Zs, cmap=plt.cm.coolwarm, norm=cm_norm, levels=color_range) # share color bar, rect [left, bottom, width, height] cbar_ax = fig.add_axes([0.92, 0.125, 0.015, 0.75]) # print fig.get_axes() cbar = fig.colorbar(cset, ax=fig.get_axes(), cax=cbar_ax, ticks=color_range) cbar.ax.tick_params(labelsize=12) if z_dim == "cum_roi": cbar_label_name = "Average cumulative returns (%)" elif z_dim == "daily_VSS": cbar_label_name = "Average daily VSS (%)" cbar.set_label(cbar_label_name, labelpad=1, size=20, fontname="Times New Roman") fig_path = os.path.join( pp.TMP_DIR, 'SPSP_CVaR_cum_roi_yearly_{}_{}.png'.format(setting, start.year)) fig.set_size_inches(16, 9) plt.savefig(fig_path, dpi=240, format='png') plt.show()
import xarray as xr import numpy as np import matplotlib from faceted import faceted from matplotlib import ticker matplotlib.rcParams['mathtext.fontset'] = 'cm' import matplotlib.pyplot as plt from matplotlib.lines import Line2D # matplotlib.rc('text', usetex=True) from lighten_color import lighten_color lam_ds = xr.open_mfdataset( '/work/bnm/buoyant_entrainment/data/lam/rho_u_v_w/slice*.nc', concat_dim='t') lam_omega = xr.open_dataarray( '/work/bnm/buoyant_entrainment/data/lam/vort_phi/azi_lam_vort.nc') lam_mask = xr.open_dataarray( '/work/bnm/buoyant_entrainment/data/lam/mask/laminar_mask.nc', engine='scipy') lam_circ = xr.open_dataarray( '/work/bnm/buoyant_entrainment/data/lam/mask/circ.nc') lam_azi_omega = xr.open_mfdataset( '/work/bnm/buoyant_entrainment/data/lam/azi_vort_phi/lam*.nc', concat_dim='t').omega_phi turb_ds = xr.open_mfdataset( '/work/bnm/buoyant_entrainment/data/turb/rho_u_v_w/slice*.nc', concat_dim='t') turb_omega = xr.open_mfdataset( '/work/bnm/buoyant_entrainment/data/turb/vort_phi/turb*.nc', concat_dim='t').omega_phi
import numpy as np import pdb fpath = '/localscratch/wllf030/cornkle/obs_data/blob_maps_MSG/' file = fpath + 'blob_map_90km_sum_18UTC.nc' file2 = fpath + 'blob_map_30km_sum_18UTC.nc' file3 = fpath + 'blob_map_90km_sum_3UTC.nc' file4 = fpath + 'blob_map_30km_sum_3UTC.nc' tpath = '/users/global/cornkle/data/pythonWorkspace/proj_CEH/topo/gtopo_1min_afr.nc' spath = '/users/global/cornkle/C_paper/wavelet/figs/paper/' diff30 = fpath + 'blob_map_30km_18-3UTRC_diff.nc' diff90 = fpath + 'blob_map_90km_18-3UTRC_diff.nc' ds = xr.open_dataarray(file) top = xr.open_dataarray(tpath) ds2 = xr.open_dataarray(file2) ds3 = xr.open_dataarray(file3) ds4 = xr.open_dataarray(file4) d30diff = xr.open_dataarray(diff30) d90diff = xr.open_dataarray(diff90) ds.name = '100k' ds2.name = '30k' ds = ds.sel(lon=slice(-17.5, 20), lat=slice(4.5, 20)) # lake chad lon=slice(10,20), lat=slice(10,15) ds2 = ds2.sel(lon=slice(-17.5, 20), lat=slice(4.5, 20)) # volta lon=slice(-10,8), lat=slice(4,10)
def composite(h): pool = multiprocessing.Pool(processes=1) file = constants.MCS_POINTS_DOM hour = h msg = xr.open_dataarray(file) msg = msg[(msg['time.hour'] == h) & (msg['time.minute'] == 0) & (msg['time.year'] >= 2006) & (msg['time.year'] <= 2010) & (msg['time.month'] >= 6)] msg = msg.sel(lat=slice(10.2, 17), lon=slice(-9.5, 9.5)) res = pool.map(file_loop, msg) pool.close() # for m in msg[0:50]: # file_loop(m) # # return res = [x for x in res if x is not None] blobs = [] scales = [] sign = [] signt = [] for r in res: blobs.append(r[0]) scales.append(r[1]) sign.append(r[2]) blobs = [item for sublist in blobs for item in sublist] # flatten list of lists scales = [item for sublist in scales for item in sublist] # flatten list of lists sign = [item for sublist in sign for item in sublist] # flatten list of lists blobs = np.array(blobs, dtype=float) blobs = blobs[np.isfinite(blobs)] scales = np.array(scales, dtype=float) scales = scales[np.isfinite(scales)] print(np.unique(blobs), len(np.unique(blobs))) weight_blobs = np.ones_like(blobs) / float(len(blobs)) weight_scales = np.ones_like(scales) / float(len(scales)) histb, hb = np.histogram(blobs, bins=np.arange(-200, 201, 20), weights=weight_blobs) hists, hs = np.histogram(scales, bins=np.arange(-200, 201, 20), weights=weight_scales) histbc, hb = np.histogram(blobs, bins=np.arange(-200, 201, 20)) histsc, hs = np.histogram(scales, bins=np.arange(-200, 201, 20)) print('Number of blobs:', blobs.size) # f = plt.figure() # plt.bar(hb[0:-1], histb, align='edge', width=hb[1::]-hb[0:-1],edgecolor='k') # # f = plt.figure() # plt.bar(hs[0:-1], hists, align='edge', width=hb[1::]-hb[0:-1],edgecolor='k') # f = plt.figure() # plt.bar(hs[0:-1], histb-hists, align='edge', width=hb[1::]-hb[0:-1],edgecolor='k') return histb, hists, hb, blobs.size, histbc, histsc