def read_erai_fc(domain,times):
	#Open ERA-Interim forecast netcdf files and extract variables needed for a range of times 
	# and given spatial domain
	#Option to also use one time (include hour)

	ref = dt.datetime(1900,1,1,0,0,0)
	if len(times) > 1:
		date_list = date_seq(times,"hours",6)
	else:
		date_list = times

	#If the last date in the list is the start of the next month, don't include in date stamp
	# list for file names
	if (date_list[-1].day==1) & (date_list[-1].hour==0):
		formatted_dates = [format_dates(x) for x in date_list[0:-1]]
	else:
		formatted_dates = [format_dates(x) for x in date_list]
	unique_dates = np.unique(formatted_dates)

	time_hours = np.empty(len(date_list))
	for t in np.arange(0,len(date_list)):
		time_hours[t] = (date_list[t] - ref).total_seconds() / (3600)

	#Get time-invariant pressure and spatial info
	lon,lat = get_lat_lon()
	lon_ind = np.where((lon >= domain[2]) & (lon <= domain[3]))[0]
	lat_ind = np.where((lat >= domain[0]) & (lat <= domain[1]))[0]
	lon = lon[lon_ind]
	lat = lat[lat_ind]
	terrain = reform_terrain(lon,lat)

	#Initialise arrays for each variable
	wg10 = np.empty((0,len(lat_ind),len(lon_ind)))
	cape = np.empty((0,len(lat_ind),len(lon_ind)))

	for date in unique_dates:
		#print(date)

	#Load ERA-Interim reanalysis files
		wg10_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/3hr/atmos/oper_fc_sfc/v01/wg10/\
wg10_3hrs_ERAI_historical_fc-sfc_"+date+"*.nc")[0])
		cape_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/3hr/atmos/oper_fc_sfc/v01/cape/\
cape_3hrs_ERAI_historical_fc-sfc_"+date+"*.nc")[0])

		#Get times to load in from file
		times = wg10_file["time"][:]
		time_ind = [np.where(x==times)[0][0] for x in time_hours if (x in times)]

		#Load data
		wg10 = np.append(wg10,wg10_file["wg10"][time_ind,lat_ind,lon_ind],axis=0)
		cape = np.append(cape,cape_file["cape"][time_ind,lat_ind,lon_ind],axis=0)

		wg10_file.close();cape_file.close()
	
	return [wg10,cape,lon,lat,date_list]
Exemple #2
0
def file_dates(files, query):

    is_in = []
    for i in np.arange(len(files)):
        t = dt.datetime.strptime(files[i].split("/")[11][:-1], "%Y%m%dT%H%M")
        t_list = date_seq(
            [t + dt.timedelta(hours=6), t + dt.timedelta(days=10)], "hours", 6)
        if any(np.in1d(query, t_list)):
            is_in.append(True)
        else:
            is_in.append(False)
    return is_in
def read_merra2(domain,times,pres=True,delta_t=1):
	#Read 3-hourly MERRA2 pressure level/surface data

	if len(times) > 1:
		date_list = date_seq(times,"hours",delta_t)
	else:
		date_list = times

	files_3d = []; files_2d = []
	for d in date_list:
		files_3d.append(glob.glob("/g/data/rr7/MERRA2/raw/M2I3NPASM.5.12.4/"+d.strftime("%Y")+"/"+d.strftime("%m")+"/MERRA2*"+d.strftime("%Y%m%d")+"*.nc4")[0])
		files_2d.append(glob.glob("/g/data/ua8/MERRA2/1hr/M2I1NXASM.5.12.4/"+d.strftime("%Y")+"/"+d.strftime("%m")+"/MERRA2*"+d.strftime("%Y%m%d")+"*.nc4")[0])
	files_3d = np.unique(files_3d)
	files_2d = np.unique(files_2d)

	f3d = xr.open_mfdataset(files_3d, combine="by_coords").sel({"time":date_list, "lev":slice(1000,100), "lon":slice(domain[2], domain[3]), "lat":slice(domain[0], domain[1])})
	f2d = xr.open_mfdataset(files_2d, combine="by_coords").sel({"time":date_list, "lon":slice(domain[2], domain[3]), "lat":slice(domain[0], domain[1])})

	ta_file = f3d["T"]; z_file = f3d["H"]; ua_file = f3d["U"]; va_file = f3d["V"]; hur_file = f3d["RH"]
	uas_file = f2d["U10M"]; vas_file = f2d["V10M"]; hus_file = f2d["QV2M"]; tas_file = f2d["T2M"]; ps_file = f2d["PS"]

	ta = ta_file.values - 273.15
	ua = ua_file.values
	va = va_file.values
	hgt = z_file.values
	hur = hur_file.values * 100
	hur[hur<0] = 0
	hur[hur>100] = 100
	dp = get_dp(ta,hur)
	uas = uas_file.values
	vas = vas_file.values
	tas = tas_file.values - 273.15
	ps = ps_file.values / 100
	ta2d = np.array(mpcalc.dewpoint_from_specific_humidity(hus_file.values, tas*units.units.degC, \
                    ps*units.units.hectopascal))
	terrain = f3d["PHIS"].isel({"time":0}).values / 9.8 
	lon = f2d["lon"].values
	lat = f2d["lat"].values
	p = f3d["lev"].values

	return [ta,dp,hur,hgt,terrain,p,ps,ua,va,uas,vas,tas,ta2d,lon,lat,date_list]
def read_erai_points(points,times):

	#Open ERA-Interim netcdf files and extract variables needed for a range of 
	# times at a given set of spatial points

	#Format dates and times
	ref = dt.datetime(1900,1,1,0,0,0)
	date_list = date_seq(times,"hours",6)
	formatted_dates = [format_dates(x) for x in date_list]
	unique_dates = np.unique(formatted_dates)
	time_hours = np.empty(len(date_list))
	for t in np.arange(0,len(date_list)):
		time_hours[t] = (date_list[t] - ref).total_seconds() / (3600)

	#Get time-invariant pressure and spatial info
	no_p, pres, p_ind = get_pressure(100)
	lon,lat = get_lat_lon()
	[lon_ind, lat_ind, lon_used, lat_used] = get_lat_lon_inds(points,lon,lat)
	terrain_new = reform_terrain(lon,lat)

	#Initialise arrays for each variable
	ta = np.empty((len(date_list),no_p,len(points)))
	dp = np.empty((len(date_list),no_p,len(points)))
	hur = np.empty((len(date_list),no_p,len(points)))
	hgt = np.empty((len(date_list),no_p,len(points)))
	p = np.empty((len(date_list),no_p,len(points)))
	ua = np.empty((len(date_list),no_p,len(points)))
	va = np.empty((len(date_list),no_p,len(points)))
	uas = np.empty((len(date_list),len(points)))
	vas = np.empty((len(date_list),len(points)))
	ps = np.empty((len(date_list),len(points)))

	for date in unique_dates:
		print(date)

		#Load ERA-Interim reanalysis files
		ta_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_pl/v01/ta/\
ta_6hrs_ERAI_historical_an-pl_"+date+"*.nc")[0])
		z_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_pl/v01/z/\
z_6hrs_ERAI_historical_an-pl_"+date+"*.nc")[0])
		ua_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_pl/v01/ua/\
ua_6hrs_ERAI_historical_an-pl_"+date+"*.nc")[0])
		va_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_pl/v01/va/\
va_6hrs_ERAI_historical_an-pl_"+date+"*.nc")[0])
		hur_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_pl/v01/hur/\
hur_6hrs_ERAI_historical_an-pl_"+date+"*.nc")[0])
		uas_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_sfc/v01/uas/\
uas_6hrs_ERAI_historical_an-sfc_"+date+"*.nc")[0])
		vas_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_sfc/v01/vas/\
vas_6hrs_ERAI_historical_an-sfc_"+date+"*.nc")[0])
		ps_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_sfc/v01/ps/\
ps_6hrs_ERAI_historical_an-sfc_"+date+"*.nc")[0])

		#Get times to load in from file
		times = ta_file["time"][:]
		time_ind = [np.where(x==times)[0][0] for x in time_hours if (x in times)]
		date_ind = np.where(np.array(formatted_dates) == date)[0]

		#Load data for each spatial point given to function
		for point in np.arange(0,len(points)):
			ta[date_ind,:,point] = ta_file["ta"][time_ind,p_ind,lat_ind[point]\
						,lon_ind[point]] - 273.15
			ua[date_ind,:,point] = ua_file["ua"][time_ind,p_ind,lat_ind[point]\
						,lon_ind[point]]
			va[date_ind,:,point] = va_file["va"][time_ind,p_ind,lat_ind[point]\
						,lon_ind[point]]
			hgt[date_ind,:,point] = z_file["z"][time_ind,p_ind,lat_ind[point]\
						,lon_ind[point]] / 9.8
			hur[date_ind,:,point] = hur_file["hur"][time_ind,p_ind,lat_ind[point]\
						,lon_ind[point]]
			hur[hur<0] = 0
			dp[date_ind,:,point] = get_dp(ta[date_ind,:,point],hur[date_ind,:,point])
			uas[date_ind,point] = uas_file["uas"][time_ind,lat_ind[point]\
						,lon_ind[point]]
			vas[date_ind,point] = vas_file["vas"][time_ind,lat_ind[point]\
						,lon_ind[point]]
			ps[date_ind,point] = ps_file["ps"][time_ind,lat_ind[point]\
						,lon_ind[point]] /100
			p[date_ind,:,point] = pres[p_ind]

		ta_file.close();z_file.close();ua_file.close();va_file.close();hur_file.close();uas_file.close();vas_file.close();ps_file.close()

	#Save lat/lon as array
	lon = np.empty((len(points)))
	lat = np.empty((len(points)))
	terrain = np.empty((len(points)))
	for point in np.arange(0,len(points)):
		lon[point] = points[point][0]
		lat[point] = points[point][1]
		terrain[point] = terrain_new[lat_ind[point],lon_ind[point]]
	
	return [ta,dp,hur,hgt,terrain,p,ps,ua,va,uas,vas,lon,lat,lon_used,lat_used,date_list]
def read_erai(domain,times):
	#Open ERA-Interim netcdf files and extract variables needed for a range of times 
	# and given spatial domain
	#Option to also use one time (include hour)

	ref = dt.datetime(1900,1,1,0,0,0)
	if len(times) > 1:
		date_list = date_seq(times,"hours",6)
	else:
		date_list = times
	formatted_dates = [format_dates(x) for x in date_list]
	unique_dates = np.unique(formatted_dates)
	time_hours = np.empty(len(date_list))
	for t in np.arange(0,len(date_list)):
		time_hours[t] = (date_list[t] - ref).total_seconds() / (3600)
	if (date_list[0].day==1) & (date_list[0].hour<3):
		fc_unique_dates = np.insert(unique_dates, 0, format_dates(date_list[0] - dt.timedelta(1)))
	else:
		fc_unique_dates = np.copy(unique_dates)

	#Get time-invariant pressure and spatial info
	no_p, p, p_ind = get_pressure(100)
	p = p[p_ind]
	lon,lat = get_lat_lon()
	lon_ind = np.where((lon >= domain[2]) & (lon <= domain[3]))[0]
	lat_ind = np.where((lat >= domain[0]) & (lat <= domain[1]))[0]
	lon = lon[lon_ind]
	lat = lat[lat_ind]
	terrain = reform_terrain(lon,lat)

	#Initialise arrays for each variable
	ta = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind)))
	dp = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind)))
	hur = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind)))
	hgt = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind)))
	ua = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind)))
	va = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind)))
	wap = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind)))
	uas = np.empty((len(date_list),len(lat_ind),len(lon_ind)))
	vas = np.empty((len(date_list),len(lat_ind),len(lon_ind)))
	ps = np.empty((len(date_list),len(lat_ind),len(lon_ind)))
	cp = np.zeros(ps.shape) * np.nan
	tp = np.zeros(ps.shape) * np.nan
	cape = np.zeros(ps.shape) * np.nan
	wg10 = np.zeros(ps.shape) * np.nan

	tas = np.empty((len(date_list),len(lat_ind),len(lon_ind)))
	ta2d = np.empty((len(date_list),len(lat_ind),len(lon_ind)))

	for date in unique_dates:

	#Load ERA-Interim reanalysis files
		ta_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_pl/v01/ta/\
ta_6hrs_ERAI_historical_an-pl_"+date+"*.nc")[0])
		z_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_pl/v01/z/\
z_6hrs_ERAI_historical_an-pl_"+date+"*.nc")[0])
		wap_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_pl/v01/wap/\
wap_6hrs_ERAI_historical_an-pl_"+date+"*.nc")[0])
		ua_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_pl/v01/ua/\
ua_6hrs_ERAI_historical_an-pl_"+date+"*.nc")[0])
		va_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_pl/v01/va/\
va_6hrs_ERAI_historical_an-pl_"+date+"*.nc")[0])
		hur_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_pl/v01/hur/\
hur_6hrs_ERAI_historical_an-pl_"+date+"*.nc")[0])
		uas_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_sfc/v01/uas/\
uas_6hrs_ERAI_historical_an-sfc_"+date+"*.nc")[0])
		vas_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_sfc/v01/vas/\
vas_6hrs_ERAI_historical_an-sfc_"+date+"*.nc")[0])
		ta2d_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_sfc/v01/ta2d/\
ta2d_6hrs_ERAI_historical_an-sfc_"+date+"*.nc")[0])
		tas_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_sfc/v01/tas/\
tas_6hrs_ERAI_historical_an-sfc_"+date+"*.nc")[0])
		ps_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/6hr/atmos/oper_an_sfc/v01/ps/\
ps_6hrs_ERAI_historical_an-sfc_"+date+"*.nc")[0])

		#Get times to load in from file
		times = ta_file["time"][:]
		time_ind = [np.where(x==times)[0][0] for x in time_hours if (x in times)]
		date_ind = np.where(np.array(formatted_dates) == date)[0]

		#Load analysis data
		ta[date_ind,:,:,:] = ta_file["ta"][time_ind,p_ind,lat_ind,lon_ind] - 273.15
		wap[date_ind,:,:,:] = wap_file["wap"][time_ind,p_ind,lat_ind,lon_ind]
		ua[date_ind,:,:,:] = ua_file["ua"][time_ind,p_ind,lat_ind,lon_ind]
		va[date_ind,:,:,:] = va_file["va"][time_ind,p_ind,lat_ind,lon_ind]
		hgt[date_ind,:,:,:] = z_file["z"][time_ind,p_ind,lat_ind,lon_ind] / 9.8
		hur[date_ind,:,:,:] = hur_file["hur"][time_ind,p_ind,lat_ind,lon_ind]
		hur[hur<0] = 0
		hur[hur>100] = 100
		dp[date_ind,:,:,:] = get_dp(ta[date_ind,:,:,:],hur[date_ind,:,:,:])
		uas[date_ind,:,:] = uas_file["uas"][time_ind,lat_ind,lon_ind]
		vas[date_ind,:,:] = vas_file["vas"][time_ind,lat_ind,lon_ind]
		tas[date_ind,:,:] = tas_file["tas"][time_ind,lat_ind,lon_ind] - 273.15
		ta2d[date_ind,:,:] = ta2d_file["ta2d"][time_ind,lat_ind,lon_ind] - 273.15
		ps[date_ind,:,:] = ps_file["ps"][time_ind,lat_ind,lon_ind] / 100

		ta_file.close();z_file.close();ua_file.close();va_file.close();hur_file.close();uas_file.close();vas_file.close();tas_file.close();ta2d_file.close();ps_file.close();wap_file.close()
	
	for date in fc_unique_dates:
	
		if int(date) >= 197900:

			tp_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/3hr/atmos/oper_fc_sfc/v01/tp/"\
	+"tp_3hrs_ERAI_historical_fc-sfc_"+date+"*.nc")[0])
			cp_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/3hr/atmos/oper_fc_sfc/v01/cp/"\
	+"cp_3hrs_ERAI_historical_fc-sfc_"+date+"*.nc")[0])
			cape_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/3hr/atmos/oper_fc_sfc/v01/cape/"\
	+"cape_3hrs_ERAI_historical_fc-sfc_"+date+"*.nc")[0])
			wg10_file = nc.Dataset(glob.glob("/g/data/ub4/erai/netcdf/3hr/atmos/oper_fc_sfc/v01/wg10/"\
	+"wg10_3hrs_ERAI_historical_fc-sfc_"+date+"*.nc")[0])

			#Load forecast data
			fc_times = nc.num2date(cp_file["time"][:], cp_file["time"].units)
			#an_times = nc.num2date(ps_file["time"][time_ind], ps_file["time"].units)
			an_times = date_list
			fc_cp = cp_file.variables["cp"][:,lat_ind,lon_ind]
			fc_tp = tp_file.variables["tp"][:,lat_ind,lon_ind]
			fc_cape = cape_file.variables["cape"][:,lat_ind,lon_ind]
			fc_wg10 = wg10_file.variables["wg10"][:,lat_ind,lon_ind]
			cnt = 0
			for an_t in an_times:
				try:
					fc_ind = np.where(an_t == np.array(fc_times))[0][0]
					cp[cnt] = ((fc_cp[fc_ind] - fc_cp[fc_ind - 1]) * 1000.)
					tp[cnt] = ((fc_tp[fc_ind] - fc_tp[fc_ind - 1]) * 1000.)
					cape[cnt] = (fc_cape[fc_ind])
					wg10[cnt] = (fc_wg10[fc_ind])
				except:
					pass
				cnt = cnt + 1

			cp_file.close(); cape_file.close(); wg10_file.close(); tp_file.close()

	return [ta,dp,hur,hgt,terrain,p,ps,wap,ua,va,uas,vas,tas,ta2d,cp,tp,wg10,cape,lon,lat,date_list]
def read_era5(domain,times,pres=True,delta_t=1):
	#Open ERA5 netcdf files and extract variables needed for a range of times 
	# and given spatial domain

	ref = dt.datetime(1900,1,1,0,0,0)
	if len(times) > 1:
		date_list = date_seq(times,"hours",delta_t)
	else:
		date_list = times
	formatted_dates = [format_dates(x) for x in date_list]
	unique_dates = np.unique(formatted_dates)
	time_hours = np.empty(len(date_list))
	for t in np.arange(0,len(date_list)):
		time_hours[t] = (date_list[t] - ref).total_seconds() / (3600)
	if (date_list[0].day==1) & (date_list[0].hour<3):
		fc_unique_dates = np.insert(unique_dates, 0, format_dates(date_list[0] - dt.timedelta(1)))
	else:
		fc_unique_dates = np.copy(unique_dates)

	#Get time-invariant pressure and spatial info
	no_p, p, p_ind = get_pressure(100)
	p = p[p_ind]
	lon,lat = get_lat_lon()
	lon_ind = np.where((lon >= domain[2]) & (lon <= domain[3]))[0]
	lat_ind = np.where((lat >= domain[0]) & (lat <= domain[1]))[0]
	lon = lon[lon_ind]
	lat = lat[lat_ind]
	terrain = reform_terrain(lon,lat)
	sfc_lon,sfc_lat = get_lat_lon_sfc()
	sfc_lon_ind = np.where((sfc_lon >= domain[2]) & (sfc_lon <= domain[3]))[0]
	sfc_lat_ind = np.where((sfc_lat >= domain[0]) & (sfc_lat <= domain[1]))[0]
	sfc_lon = sfc_lon[sfc_lon_ind]
	sfc_lat = sfc_lat[sfc_lat_ind]

	#Initialise arrays for each variable
	if pres:
		ta = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind)))
		dp = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind)))
		hur = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind)))
		hgt = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind)))
		ua = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind)))
		va = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind)))
		wap = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind)))
	uas = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind)))
	vas = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind)))
	ps = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind)))
	cp = np.zeros(ps.shape) * np.nan
	cape = np.zeros(ps.shape) * np.nan
	wg10 = np.zeros(ps.shape) * np.nan

	tas = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind)))
	ta2d = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind)))

	for date in unique_dates:

	#Load ERA-Interim reanalysis files
		if pres:
			ta_file = nc.Dataset(glob.glob("/g/data/ub4/era5/netcdf/pressure/t/"+date[0:4]+\
				"/t_era5_aus_"+date+"*.nc")[0])
			z_file = nc.Dataset(glob.glob("/g/data/ub4/era5/netcdf/pressure/z/"+date[0:4]+\
				"/z_era5_aus_"+date+"*.nc")[0])
			ua_file = nc.Dataset(glob.glob("/g/data/ub4/era5/netcdf/pressure/u/"+date[0:4]+\
				"/u_era5_aus_"+date+"*.nc")[0])
			va_file = nc.Dataset(glob.glob("/g/data/ub4/era5/netcdf/pressure/v/"+date[0:4]+\
				"/v_era5_aus_"+date+"*.nc")[0])
			hur_file = nc.Dataset(glob.glob("/g/data/ub4/era5/netcdf/pressure/r/"+date[0:4]+\
				"/r_era5_aus_"+date+"*.nc")[0])

		uas_file = nc.Dataset(glob.glob("/g/data/ub4/era5/netcdf/surface/u10/"+date[0:4]+\
			"/u10_era5_global_"+date+"*.nc")[0])
		vas_file = nc.Dataset(glob.glob("/g/data/ub4/era5/netcdf/surface/v10/"+date[0:4]+\
			"/v10_era5_global_"+date+"*.nc")[0])
		ta2d_file = nc.Dataset(glob.glob("/g/data/ub4/era5/netcdf/surface/d2m/"+date[0:4]+\
			"/d2m_era5_global_"+date+"*.nc")[0])
		tas_file = nc.Dataset(glob.glob("/g/data/ub4/era5/netcdf/surface/t2m/"+date[0:4]+\
			"/t2m_era5_global_"+date+"*.nc")[0])
		ps_file = nc.Dataset(glob.glob("/g/data/ub4/era5/netcdf/surface/sp/"+date[0:4]+\
			"/sp_era5_global_"+date+"*.nc")[0])
		cape_file = nc.Dataset(glob.glob("/g/data/ub4/era5/netcdf/surface/cape/"+date[0:4]+\
			"/cape_era5_global_"+date+"*.nc")[0])
		cp_file = nc.Dataset(glob.glob("/g/data/ub4/era5/netcdf/surface/cp/"+date[0:4]+\
			"/cp_era5_global_"+date+"*.nc")[0])
		wg10_file = nc.Dataset(glob.glob("/g/data/ub4/era5/netcdf/surface/fg10/"+date[0:4]+\
			"/fg10_era5_global_"+date+"*.nc")[0])

		#Get times to load in from file
		if pres:
			times = ta_file["time"][:]
			time_ind = [np.where(x==times)[0][0] for x in time_hours if (x in times)]
			date_ind = np.where(np.array(formatted_dates) == date)[0]
		else:
			times = uas_file["time"][:]
			time_ind = [np.where(x==times)[0][0] for x in time_hours if (x in times)]
			date_ind = np.where(np.array(formatted_dates) == date)[0]

		#Get times to load in from forecast files (wg10 and cp)
		fc_times = cp_file["time"][:]
		fc_time_ind = [np.where(x==fc_times)[0][0] for x in time_hours if (x in fc_times)]

		#Load analysis data
		if pres:
			ta[date_ind,:,:,:] = ta_file["t"][time_ind,p_ind,lat_ind,lon_ind] - 273.15
			#wap[date_ind,:,:,:] = wap_file["wap"][time_ind,p_ind,lat_ind,lon_ind]
			ua[date_ind,:,:,:] = ua_file["u"][time_ind,p_ind,lat_ind,lon_ind]
			va[date_ind,:,:,:] = va_file["v"][time_ind,p_ind,lat_ind,lon_ind]
			hgt[date_ind,:,:,:] = z_file["z"][time_ind,p_ind,lat_ind,lon_ind] / 9.8
			hur[date_ind,:,:,:] = hur_file["r"][time_ind,p_ind,lat_ind,lon_ind]
			hur[hur<0] = 0
			hur[hur>100] = 100
			dp[date_ind,:,:,:] = get_dp(ta[date_ind,:,:,:],hur[date_ind,:,:,:])
		uas[date_ind,:,:] = uas_file["u10"][time_ind,sfc_lat_ind,sfc_lon_ind]
		vas[date_ind,:,:] = vas_file["v10"][time_ind,sfc_lat_ind,sfc_lon_ind]
		tas[date_ind,:,:] = tas_file["t2m"][time_ind,sfc_lat_ind,sfc_lon_ind] - 273.15
		ta2d[date_ind,:,:] = ta2d_file["d2m"][time_ind,sfc_lat_ind,sfc_lon_ind] - 273.15
		ps[date_ind,:,:] = ps_file["sp"][time_ind,sfc_lat_ind,sfc_lon_ind] / 100
		fc_date_ind = np.in1d(date_list, nc.num2date(cp_file["time"][fc_time_ind], cp_file["time"].units))
		cp[fc_date_ind,:,:] = cp_file["cp"][fc_time_ind,sfc_lat_ind,sfc_lon_ind]
		cape[fc_date_ind,:,:] = cape_file["cape"][fc_time_ind,sfc_lat_ind,sfc_lon_ind]
		wg10[fc_date_ind,:,:] = wg10_file["fg10"][fc_time_ind,sfc_lat_ind,sfc_lon_ind]

		if pres:
			ta_file.close();z_file.close();ua_file.close();va_file.close();hur_file.close()
		uas_file.close();vas_file.close();tas_file.close();ta2d_file.close();ps_file.close()

	if pres:
		p = np.flip(p)
		ta = np.flip(ta, axis=1)
		dp = np.flip(dp, axis=1)
		hur = np.flip(hur, axis=1)
		hgt = np.flip(hgt, axis=1)
		ua = np.flip(ua, axis=1)
		va = np.flip(va, axis=1)
		return [ta,dp,hur,hgt,terrain,p,ps,ua,va,uas,vas,tas,ta2d,cp,wg10,cape,lon,lat,date_list]
	else:
		return [ps,uas,vas,tas,ta2d,cp,wg10,cape,sfc_lon,sfc_lat,date_list]
def read_era5_cds(pres_path, sfc_path, domain,times,delta_t=1):
	#Read data downloaded from the ERA5 CDS. Give this function the file paths for the pressure level
	# and surface level files

	ref = dt.datetime(1900,1,1,0,0,0)
	if len(times) > 1:
		date_list = date_seq(times,"hours",delta_t)
	else:
		date_list = times
	formatted_dates = [format_dates(x) for x in date_list]
	unique_dates = np.unique(formatted_dates)
	time_hours = np.empty(len(date_list))
	for t in np.arange(0,len(date_list)):
		time_hours[t] = (date_list[t] - ref).total_seconds() / (3600)
	if (date_list[0].day==1) & (date_list[0].hour<3):
		fc_unique_dates = np.insert(unique_dates, 0, format_dates(date_list[0] - dt.timedelta(1)))
	else:
		fc_unique_dates = np.copy(unique_dates)

	#Get time-invariant pressure and spatial info
	p = xr.open_dataset(pres_path).level.values
	p_ind = p>=100
	p = p[p_ind]
	no_p = len(p)
	lon = xr.open_dataset(pres_path).longitude.values
	lat = xr.open_dataset(pres_path).latitude.values 
	lon_ind = np.where((lon >= domain[2]) & (lon <= domain[3]))[0]
	lat_ind = np.where((lat >= domain[0]) & (lat <= domain[1]))[0]
	lon = lon[lon_ind]
	lat = lat[lat_ind]
	sfc_lon = xr.open_dataset(sfc_path).longitude.values
	sfc_lat = xr.open_dataset(sfc_path).latitude.values 
	sfc_lon_ind = np.where((sfc_lon >= domain[2]) & (sfc_lon <= domain[3]))[0]
	sfc_lat_ind = np.where((sfc_lat >= domain[0]) & (sfc_lat <= domain[1]))[0]
	sfc_lon = sfc_lon[sfc_lon_ind]
	sfc_lat = sfc_lat[sfc_lat_ind]

	#Initialise arrays for each variable
	ta = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind)))
	dp = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind)))
	hur = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind)))
	hgt = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind)))
	ua = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind)))
	va = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind)))
	wap = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind)))
	uas = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind)))
	vas = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind)))
	ps = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind)))
	cp = np.zeros(ps.shape) * np.nan
	tp = np.zeros(ps.shape) * np.nan
	cape = np.zeros(ps.shape) * np.nan
	wg10 = np.zeros(ps.shape) * np.nan

	tas = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind)))
	ta2d = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind)))

	for date in unique_dates:

	#Load ERA-Interim reanalysis files
		pres_file = nc.Dataset(pres_path)
		sfc_file = nc.Dataset(sfc_path)

		cp_file = xr.open_dataset(sfc_path).isel({"longitude":sfc_lon_ind, "latitude":sfc_lat_ind})\
			    .resample(indexer={"time":str(delta_t)+"H"},\
			    label="right",closed="right").sum("time")["cp"][1:,:,:]
		tp_file = xr.open_dataset(sfc_path).isel({"longitude":sfc_lon_ind, "latitude":sfc_lat_ind})\
			    .resample(indexer={"time":str(delta_t)+"H"},\
			    label="right",closed="right").sum("time")["tp"][1:,:,:]

		#Get times to load in from file
		times = pres_file["time"][:]
		time_ind = [np.where(x==times)[0][0] for x in time_hours if (x in times)]
		date_ind = np.where(np.array(formatted_dates) == date)[0]

		#Get times to load in from forecast files (wg10)
		fc_times = sfc_file["time"][:]
		fc_time_ind = [np.where(x==fc_times)[0][0] for x in time_hours if (x in fc_times)]

		#Get times to load in from precip files (tp)
		tp_time_ind = np.in1d(tp_file.time, [np.datetime64(date_list[i]) for i in np.arange(len(date_list))])

		#Load analysis data
		ta[date_ind,:,:,:] = pres_file["t"][time_ind,p_ind,lat_ind,lon_ind] - 273.15
		ua[date_ind,:,:,:] = pres_file["u"][time_ind,p_ind,lat_ind,lon_ind]
		va[date_ind,:,:,:] = pres_file["v"][time_ind,p_ind,lat_ind,lon_ind]
		hgt[date_ind,:,:,:] = pres_file["z"][time_ind,p_ind,lat_ind,lon_ind] / 9.8
		hur[date_ind,:,:,:] = pres_file["r"][time_ind,p_ind,lat_ind,lon_ind]
		hur[hur<0] = 0
		hur[hur>100] = 100
		dp[date_ind,:,:,:] = get_dp(ta[date_ind,:,:,:],hur[date_ind,:,:,:])
		uas[date_ind,:,:] = sfc_file["u10"][time_ind,sfc_lat_ind,sfc_lon_ind]
		vas[date_ind,:,:] = sfc_file["v10"][time_ind,sfc_lat_ind,sfc_lon_ind]
		tas[date_ind,:,:] = sfc_file["t2m"][time_ind,sfc_lat_ind,sfc_lon_ind] - 273.15
		ta2d[date_ind,:,:] = sfc_file["d2m"][time_ind,sfc_lat_ind,sfc_lon_ind] - 273.15
		ps[date_ind,:,:] = sfc_file["sp"][time_ind,sfc_lat_ind,sfc_lon_ind] / 100
		fc_date_ind = np.in1d(date_list, nc.num2date(sfc_file["time"][fc_time_ind], sfc_file["time"].units))
		tp_date_ind = np.in1d([np.datetime64(date_list[i]) for i in np.arange(len(date_list))],tp_file.time.values)
		cp[tp_date_ind,:,:] = cp_file.isel({"time":tp_time_ind}).values * 1000
		tp[tp_date_ind,:,:] = tp_file.isel({"time":tp_time_ind}).values * 1000
		cape[fc_date_ind,:,:] = sfc_file["cape"][fc_time_ind,sfc_lat_ind,sfc_lon_ind]
		wg10[fc_date_ind,:,:] = sfc_file["fg10"][fc_time_ind,sfc_lat_ind,sfc_lon_ind]

		terrain = sfc_file["z"][0,sfc_lat_ind,sfc_lon_ind] / 9.8
		
		tp_file.close(); cp_file.close(); sfc_file.close(); pres_file.close()

	p = np.flip(p)
	ta = np.flip(ta, axis=1)
	dp = np.flip(dp, axis=1)
	hur = np.flip(hur, axis=1)
	hgt = np.flip(hgt, axis=1)
	ua = np.flip(ua, axis=1)
	va = np.flip(va, axis=1)
	return [ta,dp,hur,hgt,terrain,p,ps,ua,va,uas,vas,tas,ta2d,cp,tp,wg10,cape,lon,lat,date_list]
def read_era5_rt52(domain,times,pres=True,delta_t=1):
	#Open ERA5 netcdf files and extract variables needed for a range of times 
	# and given spatial domain

	ref = dt.datetime(1900,1,1,0,0,0)
	if len(times) > 1:
		date_list = date_seq(times,"hours",delta_t)
	else:
		date_list = times
	formatted_dates = [format_dates(x) for x in date_list]
	unique_dates = np.unique(formatted_dates)
	time_hours = np.empty(len(date_list))
	for t in np.arange(0,len(date_list)):
		time_hours[t] = (date_list[t] - ref).total_seconds() / (3600)
	if (date_list[0].day==1) & (date_list[0].hour<3):
		fc_unique_dates = np.insert(unique_dates, 0, format_dates(date_list[0] - dt.timedelta(1)))
	else:
		fc_unique_dates = np.copy(unique_dates)

	#Get time-invariant pressure and spatial info
	no_p, p, p_ind = get_pressure(100)
	p = p[p_ind]
	lon,lat = get_lat_lon_rt52()
	lon_ind = np.where((lon >= domain[2]) & (lon <= domain[3]))[0]
	lat_ind = np.where((lat >= domain[0]) & (lat <= domain[1]))[0]
	lon = lon[lon_ind]
	lat = lat[lat_ind]
	terrain = reform_terrain(lon,lat)
	sfc_lon,sfc_lat = get_lat_lon_sfc()
	sfc_lon_ind = np.where((sfc_lon >= domain[2]) & (sfc_lon <= domain[3]))[0]
	sfc_lat_ind = np.where((sfc_lat >= domain[0]) & (sfc_lat <= domain[1]))[0]
	sfc_lon = sfc_lon[sfc_lon_ind]
	sfc_lat = sfc_lat[sfc_lat_ind]

	#Initialise arrays for each variable
	if pres:
		ta = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind)))
		dp = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind)))
		hur = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind)))
		hgt = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind)))
		ua = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind)))
		va = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind)))
		wap = np.empty((len(date_list),no_p,len(lat_ind),len(lon_ind)))
	uas = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind)))
	vas = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind)))
	sst = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind)))    
	ps = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind)))
	cp = np.zeros(ps.shape) * np.nan
	tp = np.zeros(ps.shape) * np.nan
	cape = np.zeros(ps.shape) * np.nan
	wg10 = np.zeros(ps.shape) * np.nan

	tas = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind)))
	ta2d = np.empty((len(date_list),len(sfc_lat_ind),len(sfc_lon_ind)))

	for date in unique_dates:

	#Load ERA-Interim reanalysis files
		if pres:
			ta_file = nc.Dataset(glob.glob("/g/data/rt52/era5/pressure-levels/reanalysis/t/"+date[0:4]+\
				"/t_era5_oper_pl_"+date+"*.nc")[0])
			z_file = nc.Dataset(glob.glob("/g/data/rt52/era5/pressure-levels/reanalysis/z/"+date[0:4]+\
				"/z_era5_oper_pl_"+date+"*.nc")[0])
			ua_file = nc.Dataset(glob.glob("/g/data/rt52/era5/pressure-levels/reanalysis/u/"+date[0:4]+\
				"/u_era5_oper_pl_"+date+"*.nc")[0])
			va_file = nc.Dataset(glob.glob("/g/data/rt52/era5/pressure-levels/reanalysis/v/"+date[0:4]+\
				"/v_era5_oper_pl_"+date+"*.nc")[0])
			hur_file = nc.Dataset(glob.glob("/g/data/rt52/era5/pressure-levels/reanalysis/r/"+date[0:4]+\
				"/r_era5_oper_pl_"+date+"*.nc")[0])

		uas_file = nc.Dataset(glob.glob("/g/data/rt52/era5/single-levels/reanalysis/10u/"+date[0:4]+\
			"/10u_era5_oper_sfc_"+date+"*.nc")[0])
		vas_file = nc.Dataset(glob.glob("/g/data/rt52/era5/single-levels/reanalysis/10v/"+date[0:4]+\
			"/10v_era5_oper_sfc_"+date+"*.nc")[0])
		sst_file = nc.Dataset(glob.glob("/g/data/rt52/era5/single-levels/reanalysis/sst/"+date[0:4]+\
			"/sst_era5_oper_sfc_"+date+"*.nc")[0])        
		ta2d_file = nc.Dataset(glob.glob("/g/data/rt52/era5/single-levels/reanalysis/2d/"+date[0:4]+\
			"/2d_era5_oper_sfc_"+date+"*.nc")[0])
		tas_file = nc.Dataset(glob.glob("/g/data/rt52/era5/single-levels/reanalysis/2t/"+date[0:4]+\
			"/2t_era5_oper_sfc_"+date+"*.nc")[0])
		ps_file = nc.Dataset(glob.glob("/g/data/rt52/era5/single-levels/reanalysis/sp/"+date[0:4]+\
			"/sp_era5_oper_sfc_"+date+"*.nc")[0])
		cape_file = nc.Dataset(glob.glob("/g/data/rt52/era5/single-levels/reanalysis/cape/"+date[0:4]+\
			"/cape_era5_oper_sfc_"+date+"*.nc")[0])
		cp_file = (xr.open_dataset(glob.glob("/g/data/rt52/era5/single-levels/reanalysis/mcpr/"+date[0:4]+\
			"/mcpr_era5_oper_sfc_"+date+"*.nc")[0]).isel({"longitude":sfc_lon_ind, "latitude":sfc_lat_ind}) * 3600)\
			    .resample(indexer={"time":str(delta_t)+"H"},\
			    label="right",closed="right").sum("time")["mcpr"][1:,:,:]
		tp_file = (xr.open_dataset(glob.glob("/g/data/rt52/era5/single-levels/reanalysis/mtpr/"+date[0:4]+\
			"/mtpr_era5_oper_sfc_"+date+"*.nc")[0]).isel({"longitude":sfc_lon_ind, "latitude":sfc_lat_ind}) * 3600)\
			    .resample(indexer={"time":str(delta_t)+"H"},\
			    label="right",closed="right").sum("time")["mtpr"][1:,:,:]
		wg10_file = nc.Dataset(glob.glob("/g/data/rt52/era5/single-levels/reanalysis/10fg/"+date[0:4]+\
			"/10fg_era5_oper_sfc_"+date+"*.nc")[0])

		#Get times to load in from file
		if pres:
			times = ta_file["time"][:]
			time_ind = [np.where(x==times)[0][0] for x in time_hours if (x in times)]
			date_ind = np.where(np.array(formatted_dates) == date)[0]
		else:
			times = uas_file["time"][:]
			time_ind = [np.where(x==times)[0][0] for x in time_hours if (x in times)]
			date_ind = np.where(np.array(formatted_dates) == date)[0]

		#Get times to load in from forecast files (wg10)
		fc_times = wg10_file["time"][:]
		fc_time_ind = [np.where(x==fc_times)[0][0] for x in time_hours if (x in fc_times)]

		#Get times to load in from precip files (tp)
		tp_time_ind = np.in1d(tp_file.time, [np.datetime64(date_list[i]) for i in np.arange(len(date_list))])

		#Load analysis data
		if pres:
			ta[date_ind,:,:,:] = ta_file["t"][time_ind,p_ind,lat_ind,lon_ind] - 273.15
			#wap[date_ind,:,:,:] = wap_file["wap"][time_ind,p_ind,lat_ind,lon_ind]
			ua[date_ind,:,:,:] = ua_file["u"][time_ind,p_ind,lat_ind,lon_ind]
			va[date_ind,:,:,:] = va_file["v"][time_ind,p_ind,lat_ind,lon_ind]
			hgt[date_ind,:,:,:] = z_file["z"][time_ind,p_ind,lat_ind,lon_ind] / 9.8
			hur[date_ind,:,:,:] = hur_file["r"][time_ind,p_ind,lat_ind,lon_ind]
			hur[hur<0] = 0
			hur[hur>100] = 100
			dp[date_ind,:,:,:] = get_dp(ta[date_ind,:,:,:],hur[date_ind,:,:,:])
		uas[date_ind,:,:] = uas_file["u10"][time_ind,sfc_lat_ind,sfc_lon_ind]
		vas[date_ind,:,:] = vas_file["v10"][time_ind,sfc_lat_ind,sfc_lon_ind]
		sst[date_ind,:,:] = sst_file["sst"][time_ind,sfc_lat_ind,sfc_lon_ind] - 273.15        
		tas[date_ind,:,:] = tas_file["t2m"][time_ind,sfc_lat_ind,sfc_lon_ind] - 273.15
		ta2d[date_ind,:,:] = ta2d_file["d2m"][time_ind,sfc_lat_ind,sfc_lon_ind] - 273.15
		ps[date_ind,:,:] = ps_file["sp"][time_ind,sfc_lat_ind,sfc_lon_ind] / 100
		fc_date_ind = np.in1d(date_list, nc.num2date(wg10_file["time"][fc_time_ind], wg10_file["time"].units))
		tp_date_ind = np.in1d([np.datetime64(date_list[i]) for i in np.arange(len(date_list))],tp_file.time.values)
		cp[tp_date_ind,:,:] = cp_file.isel({"time":tp_time_ind}).values
		tp[tp_date_ind,:,:] = tp_file.isel({"time":tp_time_ind}).values
		cape[fc_date_ind,:,:] = cape_file["cape"][fc_time_ind,sfc_lat_ind,sfc_lon_ind]
		wg10[fc_date_ind,:,:] = wg10_file["fg10"][fc_time_ind,sfc_lat_ind,sfc_lon_ind]

		if pres:
			ta_file.close();z_file.close();ua_file.close();va_file.close();hur_file.close()
		uas_file.close();vas_file.close();tas_file.close();ta2d_file.close();ps_file.close()
		sst_file.close()

	if pres:
		p = np.flip(p)
		ta = np.flip(ta, axis=1)
		dp = np.flip(dp, axis=1)
		hur = np.flip(hur, axis=1)
		hgt = np.flip(hgt, axis=1)
		ua = np.flip(ua, axis=1)
		va = np.flip(va, axis=1)
		return [ta,dp,hur,hgt,terrain,p,ps,ua,va,uas,vas,tas,ta2d,cp,tp,wg10,cape,sst,lon,lat,date_list]
	else:
		return [ps,uas,vas,tas,ta2d,cp,tp,wg10,cape,sfc_lon,sfc_lat,date_list]
Exemple #9
0
def to_points_wind_gust(loc_id, points, fname, start_year, end_year):

    #Load daily maximum wind gust data from du7, and extract point values
    #As in to_points_loop(), but just for vertical velocity at 700 hPa, from the ma07 directory
    from dask.diagnostics import ProgressBar
    import gc
    ProgressBar().register()

    dates = []
    for y in np.arange(start_year, end_year + 1):
        for m in np.arange(1, 13):
            dates.append(dt.datetime(y, m, 1, 12, 0, 0))
    last_date = dt.datetime(y + 1, 1, 1, 12, 0, 0)

    df = pd.DataFrame()

    lsm = xr.open_dataset(
        "/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/static/lnd_mask-BARPA-EASTAUS_12km.nc"
    )

    #Read netcdf data
    for t in np.arange(len(dates)):
        print(dates[t])
        try:
            query_dates = date_seq(
                [dates[t], dates[t + 1] + dt.timedelta(days=-1)], "hours", 24)
        except:
            query_dates = date_seq(
                [dates[t], last_date + dt.timedelta(days=-1)], "hours", 24)
        wg_files = np.sort(
            glob.glob(
                "/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/era/erai/r0/*/*/pp0/max_wndgust10m*.nc"
            ))
        wg_files = wg_files[file_dates(wg_files, query_dates)]
        f = drop_duplicates(
            xr.open_mfdataset(wg_files, concat_dim="time",
                              combine="nested")).sel({"time": query_dates})

        #Setup lsm
        lat = f.coords.get("latitude").values
        lon = f.coords.get("longitude").values
        x, y = np.meshgrid(lon, lat)
        x[lsm.lnd_mask == 0] = np.nan
        y[lsm.lnd_mask == 0] = np.nan

        dist_lon = []
        dist_lat = []
        for i in np.arange(len(loc_id)):

            dist = np.sqrt(np.square(x-points[i][0]) + \
                    np.square(y-points[i][1]))
            temp_lat, temp_lon = np.unravel_index(np.nanargmin(dist),
                                                  dist.shape)
            dist_lon.append(temp_lon)
            dist_lat.append(temp_lat)

        temp_df = f["max_wndgust10m"].isel(latitude = xr.DataArray(dist_lat, dims="points"), \
                        longitude = xr.DataArray(dist_lon, dims="points")).persist().to_dataframe()
        temp_df = temp_df.reset_index()
        temp_df["time"] = pd.DatetimeIndex(
            temp_df.time) + dt.timedelta(hours=-12)

        for p in np.arange(len(loc_id)):
            temp_df.loc[temp_df.points == p, "loc_id"] = loc_id[p]

        temp_df = temp_df.drop(["points",\
                "forecast_period", "forecast_reference_time", "height"],axis=1)

        df = pd.concat([df, temp_df])
        f.close()
        gc.collect()

    df.sort_values([
        "loc_id", "time"
    ]).to_pickle("/g/data/eg3/ab4502/ExtremeWind/points/" + fname + ".pkl")
Exemple #10
0
def read_barpa(domain, time, experiment, forcing_mdl, ensemble):

    #NOTE: Data has been set to zero for below surface pressure.
    #But wrf_parallel doesn't use these levels anyway
    #TODO: The above statement I think is false. -273.15 K values may cause problems for some routines,
    # even if below ground level. Mask these values to NaN

    #Create a list of 6-hourly "query" date-times, based on the start and end dates provided.
    query_dates = date_seq(time, "hours", 6)

    #Get a list of all BARPA files in the du7 directory, for a given experiment/forcing model
    geopt_files = np.sort(glob.glob("/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/"+\
         experiment+"/"+forcing_mdl+\
         "/"+ensemble+"/*/*/pp2/geop_ht_uv*"))
    hus_files = np.sort(glob.glob("/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/"+\
         experiment+"/"+forcing_mdl+\
         "/"+ensemble+"/*/*/pp2/spec_hum*"))
    ta_files = np.sort(glob.glob("/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/"+\
         experiment+"/"+forcing_mdl+\
         "/"+ensemble+"/*/*/pp2/air_temp*"))
    ua_files = np.sort(glob.glob("/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/"+\
         experiment+"/"+forcing_mdl+\
         "/"+ensemble+"/*/*/pp2/wnd_ucmp*"))
    va_files = np.sort(glob.glob("/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/"+\
         experiment+"/"+forcing_mdl+\
         "/"+ensemble+"/*/*/pp2/wnd_vcmp*"))
    huss_files = np.sort(glob.glob("/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/"+\
         experiment+"/"+forcing_mdl+\
         "/"+ensemble+"/*/*/pp3/qsair_scrn*"))
    dewpt_files = np.sort(glob.glob("/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/"+\
         experiment+"/"+forcing_mdl+\
         "/"+ensemble+"/*/*/pp26/dewpt_scrn*"))
    tas_files = np.sort(glob.glob("/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/"+\
         experiment+"/"+forcing_mdl+\
         "/"+ensemble+"/*/*/pp3/temp_scrn*"))
    uas_files = np.sort(glob.glob("/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/"+\
         experiment+"/"+forcing_mdl+\
         "/"+ensemble+"/*/*/pp3/uwnd10m_b*"))
    vas_files = np.sort(glob.glob("/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/"+\
         experiment+"/"+forcing_mdl+\
         "/"+ensemble+"/*/*/pp3/vwnd10m_b*"))
    ps_files = np.sort(glob.glob("/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/"+\
         experiment+"/"+forcing_mdl+\
         "/"+ensemble+"/*/*/pp26/sfc_pres*"))
    wg_files = np.sort(glob.glob("/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/"+\
         experiment+"/"+forcing_mdl+\
         "/"+ensemble+"/*/*/pp26/wndgust10m*"))

    #Get the files that we need
    geopt_files = geopt_files[file_dates(geopt_files, query_dates)]
    hus_files = hus_files[file_dates(hus_files, query_dates)]
    ta_files = ta_files[file_dates(ta_files, query_dates)]
    ua_files = ua_files[file_dates(ua_files, query_dates)]
    va_files = va_files[file_dates(va_files, query_dates)]
    huss_files = huss_files[file_dates(huss_files, query_dates)]
    dewpt_files = dewpt_files[file_dates(dewpt_files, query_dates)]
    tas_files = tas_files[file_dates(tas_files, query_dates)]
    uas_files = uas_files[file_dates(uas_files, query_dates)]
    vas_files = vas_files[file_dates(vas_files, query_dates)]
    ps_files = ps_files[file_dates(ps_files, query_dates)]
    wg_files = wg_files[file_dates(wg_files, query_dates)]

    #Load in these files, dropping duplicates
    #Drop the variable "realization", as it appears in some streams but not others, and is not used
    geopt_ds = drop_duplicates(
        xr.open_mfdataset(geopt_files,
                          concat_dim="time",
                          combine="nested",
                          drop_variables=["realization"]))  #m
    hus_ds = drop_duplicates(
        xr.open_mfdataset(hus_files,
                          concat_dim="time",
                          combine="nested",
                          drop_variables=["realization"]))  #1 (kg/kg?)
    ta_ds = drop_duplicates(
        xr.open_mfdataset(ta_files,
                          concat_dim="time",
                          combine="nested",
                          drop_variables=["realization"]))  #K
    ua_ds = drop_duplicates(
        xr.open_mfdataset(ua_files,
                          concat_dim="time",
                          combine="nested",
                          drop_variables=["realization"]))  #m/s
    va_ds = drop_duplicates(
        xr.open_mfdataset(va_files,
                          concat_dim="time",
                          combine="nested",
                          drop_variables=["realization"]))  #m/s
    huss_ds = drop_duplicates(
        xr.open_mfdataset(huss_files,
                          concat_dim="time",
                          combine="nested",
                          drop_variables=["realization"]))  #kg/kg
    dewpt_ds = drop_duplicates(
        xr.open_mfdataset(dewpt_files,
                          concat_dim="time",
                          combine="nested",
                          drop_variables=["realization"]))  #K
    tas_ds = drop_duplicates(
        xr.open_mfdataset(tas_files,
                          concat_dim="time",
                          combine="nested",
                          drop_variables=["realization"]))  #K
    uas_ds = drop_duplicates(
        xr.open_mfdataset(uas_files,
                          concat_dim="time",
                          combine="nested",
                          drop_variables=["realization"]))  #m/s
    vas_ds = drop_duplicates(
        xr.open_mfdataset(vas_files,
                          concat_dim="time",
                          combine="nested",
                          drop_variables=["realization"]))  #m/s
    ps_ds = drop_duplicates(
        xr.open_mfdataset(ps_files,
                          concat_dim="time",
                          combine="nested",
                          drop_variables=["realization"]))  #Pa
    wg_ds = drop_duplicates(
        xr.open_mfdataset(wg_files,
                          concat_dim="time",
                          combine="nested",
                          drop_variables=["realization"]))  #m/s

    #Slice to query times, spatial domain, convert to dataarray, restrict to below 100 hPa
    lons = slice(domain[2], domain[3])
    lats = slice(domain[0], domain[1])
    geopt_da = geopt_ds.sel({
        "time": query_dates,
        "pressure": geopt_ds["pressure"] >= 100,
        "latitude": lats,
        "longitude": lons
    })["geop_ht_uv"]
    hus_da = hus_ds.sel({
        "time": query_dates,
        "pressure": geopt_ds["pressure"] >= 100,
        "latitude": lats,
        "longitude": lons
    })["spec_hum_uv"]
    ta_da = ta_ds.sel({
        "time": query_dates,
        "pressure": geopt_ds["pressure"] >= 100,
        "latitude": lats,
        "longitude": lons
    })["air_temp_uv"]
    ua_da = ua_ds.sel({
        "time": query_dates,
        "pressure": geopt_ds["pressure"] >= 100,
        "latitude": lats,
        "longitude": lons
    })["wnd_ucmp_uv"]
    va_da = va_ds.sel({
        "time": query_dates,
        "pressure": geopt_ds["pressure"] >= 100,
        "latitude": lats,
        "longitude": lons
    })["wnd_vcmp_uv"]
    huss_da = huss_ds.sel({
        "time": query_dates,
        "latitude": lats,
        "longitude": lons
    })["qsair_scrn"]
    dewpt_da = dewpt_ds.sel({
        "time": query_dates,
        "latitude": lats,
        "longitude": lons
    })["dewpt_scrn"]
    tas_da = tas_ds.sel({
        "time": query_dates,
        "latitude": lats,
        "longitude": lons
    })["temp_scrn"]
    uas_da = uas_ds.sel({
        "time": query_dates,
        "latitude": lats,
        "longitude": lons
    })["uwnd10m_b"]
    vas_da = vas_ds.sel({
        "time": query_dates,
        "latitude": lats,
        "longitude": lons
    })["vwnd10m_b"]
    ps_da = ps_ds.sel({
        "time": query_dates,
        "latitude": lats,
        "longitude": lons
    })["sfc_pres"]
    wg_da = wg_ds.sel({
        "time": query_dates,
        "latitude": lats,
        "longitude": lons
    })["wndgust10m"]

    #As in read_cmip, make sure that all data arrays have the same times (take the union of the set of times).
    #If one of the dataarrays goes to size=0 on the time dimension, throw an error
    common_dates = np.array(list(set(hus_da.time.values) & set(ta_da.time.values) & set(ua_da.time.values)\
                    & set(va_da.time.values) & set(huss_da.time.values) & set(tas_da.time.values)\
                    & set(uas_da.time.values) & set(vas_da.time.values) & set(ps_da.time.values)\
      & set(geopt_da.time.values) & set(wg_da.time.values) & set(dewpt_da.time.values)))
    geopt_da = geopt_da.isel({"time": np.in1d(geopt_da.time, common_dates)})
    hus_da = hus_da.isel({"time": np.in1d(hus_da.time, common_dates)})
    ta_da = ta_da.isel({"time": np.in1d(ta_da.time, common_dates)})
    ua_da = ua_da.isel({"time": np.in1d(ua_da.time, common_dates)})
    va_da = va_da.isel({"time": np.in1d(va_da.time, common_dates)})
    huss_da = huss_da.isel({"time": np.in1d(huss_da.time, common_dates)})
    dewpt_da = dewpt_da.isel({"time": np.in1d(dewpt_da.time, common_dates)})
    tas_da = tas_da.isel({"time": np.in1d(tas_da.time, common_dates)})
    uas_da = uas_da.isel({"time": np.in1d(uas_da.time, common_dates)})
    vas_da = vas_da.isel({"time": np.in1d(vas_da.time, common_dates)})
    ps_da = ps_da.isel({"time": np.in1d(ps_da.time, common_dates)})
    wg_da = wg_da.isel({"time": np.in1d(wg_da.time, common_dates)})
    for da in [
            geopt_da, hus_da, ta_da, ua_da, va_da, huss_da, dewpt_da, tas_da,
            uas_da, vas_da, ps_da, wg_da
    ]:
        if len(da.time.values) == 0:
            varname = da.attrs["standard_name"]
            raise ValueError("ERROR: " + varname +
                             " HAS BEEN SLICED IN TIME DIMENSION TO SIZE=0")

    #Now linearly interpolate pressure level data to match the BARRA pressure levels
    kwargs = {"fill_value": None, "bounds_error": False}
    #barra_levs = [100.0000000001, 150.0000000001, 175.0000000001,
    #    200.0000000001, 225.0000000001, 250.0000000001, 275.0000000001,
    #    300.0000000001, 350.0000000001, 400.0000000001, 450.0000000001,
    #    500.0000000001, 600.0000000001, 700.0000000001, 750.0000000001,
    #    800.0000000001, 850.0000000001, 900.0000000001, 925.0000000001,
    #    950.0000000001, 975.0000000001, 1000.0000000001]
    #geopt_da = geopt_da.interp(coords={"pressure":barra_levs}, method="linear", kwargs=kwargs)
    #hus_da = hus_da.interp(coords={"pressure":barra_levs}, method="linear", kwargs=kwargs)
    #ta_da = ta_da.interp(coords={"pressure":barra_levs}, method="linear", kwargs=kwargs)
    #ua_da = ua_da.interp(coords={"pressure":barra_levs}, method="linear", kwargs=kwargs)
    #va_da = va_da.interp(coords={"pressure":barra_levs}, method="linear", kwargs=kwargs)

    #Linearly interpolate variables onto the same lat/lon grid (pressure level U/V grid). Extrapolate to staggered values outside the grid
    huss_da = huss_da.interp(coords={
        "latitude": hus_da.latitude,
        "longitude": hus_da.longitude
    },
                             method="linear",
                             kwargs=kwargs)
    dewpt_da = dewpt_da.interp(coords={
        "latitude": hus_da.latitude,
        "longitude": hus_da.longitude
    },
                               method="linear",
                               kwargs=kwargs)
    tas_da = tas_da.interp(coords={
        "latitude": hus_da.latitude,
        "longitude": hus_da.longitude
    },
                           method="linear",
                           kwargs=kwargs)
    uas_da = uas_da.interp(coords={
        "latitude": hus_da.latitude,
        "longitude": hus_da.longitude
    },
                           method="linear",
                           kwargs=kwargs)
    vas_da = vas_da.interp(coords={
        "latitude": hus_da.latitude,
        "longitude": hus_da.longitude
    },
                           method="linear",
                           kwargs=kwargs)
    ps_da = ps_da.interp(coords={
        "latitude": hus_da.latitude,
        "longitude": hus_da.longitude
    },
                         method="linear",
                         kwargs=kwargs)
    wg_da = wg_da.interp(coords={
        "latitude": hus_da.latitude,
        "longitude": hus_da.longitude
    },
                         method="linear",
                         kwargs=kwargs)

    #Get numpy arrays of everything, and convert temperatures to degC and sfc pressure to hPa
    geopt = geopt_da.values
    hus = hus_da.values
    ta = ta_da.values - 273.15
    ua = ua_da.values
    va = va_da.values
    huss = huss_da.values
    dewpt = dewpt_da.values - 273.15
    tas = tas_da.values - 273.15
    uas = uas_da.values
    vas = vas_da.values
    ps = ps_da.values / 100.
    wg = wg_da.values

    #Mask -273.15 K values (these should only be values below surface)
    mask = (ta == (-273.15))
    geopt[mask] = np.nan
    hus[mask] = np.nan
    ta[mask] = np.nan
    ua[mask] = np.nan
    va[mask] = np.nan

    #Create 3d pressure variable
    p = np.moveaxis(
        np.tile(hus_da.pressure.values, [ta.shape[2], ta.shape[3], 1]), 2, 0)

    #Get hur from hus, ta and p3d
    hur = np.array(mpcalc.relative_humidity_from_specific_humidity(hus, \
                       ta*units.degC, p*units.hectopascal) * 100)
    hur[hur < 0] = 0
    hur[hur > 100] = 100

    #Load terrain data
    terrain = xr.open_dataset("/g/data/du7/barpa/trials/BARPA-EASTAUS_12km/static/topog-BARPA-EASTAUS_12km.nc").\
     sel({"latitude":lats, "longitude":lons})["topog"].values

    #Get lat/lon
    lat = hus_da.latitude.values
    lon = hus_da.longitude.values

    #Flip the pressure dimension
    ta = np.flip(ta, axis=1)
    hur = np.flip(hur, axis=1)
    geopt = np.flip(geopt, axis=1)
    p = np.flip(p, axis=0)
    ua = np.flip(ua, axis=1)
    va = np.flip(va, axis=1)

    #Return times from one of the data arrays (they are identical in time). If it is different to the query date, then throw a warning
    query_times = pd.to_datetime(query_dates)
    times = pd.to_datetime(huss_da.time.values)
    if all(np.in1d(query_times, times)):
        pass
    else:
        message = "\n ".join(
            ~query_times[np.in1d(query_times, times)].strftime("%Y%m%d %H:%M"))
        warnings.warn("WARNING: The following query dates were not loaded..." +
                      message)

    #Format times for output (datetime objects)
    out_times = [
        dt.datetime.strptime(
            huss_da.time.dt.strftime("%Y-%m-%d %H:%M").values[i],
            "%Y-%m-%d %H:%M") for i in np.arange(huss_da.time.shape[0])
    ]

    return [ta, hur, geopt, terrain, p[:,0,0], ps, ua, va, uas, vas, tas, dewpt, wg, lon,\
                       lat, out_times]