def plot_with_lines(year=None, target=None): fig = plt.figure(figsize=(10, 5)) gs = gsp.GridSpec(1, 2, width_ratios=[2, 1]) ax1 = plt.subplot(gs[0]) ax2 = plt.subplot(gs[1]) wprof = parse_data.windprof(year) wp = np.squeeze(pandas2stack(wprof.dframe[target])) wp_ma = ma.masked_where(np.isnan(wp), wp) X, Y = wprof.time, wprof.hgt ax1.pcolormesh(X, Y, wp_ma, vmin=0, vmax=360) ax1.xaxis.set_major_locator(mdates.MonthLocator()) ax1.xaxis.set_major_formatter(mdates.DateFormatter('%b\n%Y')) ax1.set_xlabel(r'$ Time \rightarrow$') ax1.set_ylabel('height gate') for prof in range(wp.shape[1]): x = wp[:, prof] y = range(wp.shape[0]) ax2.plot(x, y, color='r', alpha=0.05) # ax2.scatter(x,y,color='r',alpha=0.05) ax2.set_yticklabels('') ax2.set_xlabel(target) ax1.set_title('BBY Windprof wdir') plt.tight_layout() plt.show(block=False)
def plot_with_lines(year=None,target=None): fig = plt.figure(figsize=(10,5)) gs = gsp.GridSpec(1, 2, width_ratios=[2,1] ) ax1 = plt.subplot(gs[0]) ax2 = plt.subplot(gs[1]) wprof = parse_data.windprof(year) wp = np.squeeze(pandas2stack(wprof.dframe[target])) wp_ma = ma.masked_where(np.isnan(wp),wp) X,Y=wprof.time,wprof.hgt ax1.pcolormesh(X,Y,wp_ma,vmin=0,vmax=360) ax1.xaxis.set_major_locator(mdates.MonthLocator()) ax1.xaxis.set_major_formatter(mdates.DateFormatter('%b\n%Y')) ax1.set_xlabel(r'$ Time \rightarrow$') ax1.set_ylabel('height gate') for prof in range(wp.shape[1]): x = wp[:,prof] y = range(wp.shape[0]) ax2.plot(x,y,color='r',alpha=0.05) # ax2.scatter(x,y,color='r',alpha=0.05) ax2.set_yticklabels('') ax2.set_xlabel(target) ax1.set_title('BBY Windprof wdir') plt.tight_layout() plt.show(block=False)
import parse_data for y in [1998] + range(2001, 2013): wprof = parse_data.windprof(y) wprof.check_hgt(y)
def process(year=[],wdsurf=None, wdwpro=None,rainbb=None, raincz=None, nhours=None): binss={'wdir':np.arange(0,370,10), 'wspd':np.arange(0,36,1)} target = ['wdir','wspd'] arrays = {} for t in target: first = True for y in year: print('Processing year {}'.format(y)) ' tta analysis ' tta = tta_analysis(y) tta.start_df(wdir_surf=wdsurf, wdir_wprof=wdwpro, rain_bby=rainbb, rain_czd=raincz, nhours=nhours) ' retrieve dates ' include_dates = tta.include_dates tta_dates = tta.tta_dates notta_dates = tta.notta_dates ' read wprof ' wprof_df = parse_data.windprof(y) wprof = wprof_df.dframe[t] ' wprof partition ' wprof = wprof.loc[include_dates] # all included wprof_tta = wprof.loc[tta_dates] # only tta wprof_notta = wprof.loc[notta_dates]# only notta s1 = np.squeeze(pandas2stack(wprof)) s2 = np.squeeze(pandas2stack(wprof_tta)) s3 = np.squeeze(pandas2stack(wprof_notta)) if first: wp = s1 wp_tta = s2 wp_notta = s3 first = False else: wp = np.hstack((wp,s1)) wp_tta = np.hstack((wp_tta,s2)) wp_notta = np.hstack((wp_notta, s3)) _,wp_hours = wp.shape _,tta_hours = wp_tta.shape _,notta_hours = wp_notta.shape arrays[t]=[wp,wp_tta,wp_notta] ' makes CFAD ' hist_array_spd = np.empty((40,len(binss['wspd'])-1,3)) hist_array_dir = np.empty((40,len(binss['wdir'])-1,3)) cfad_array_spd = np.empty((40,len(binss['wspd'])-1,3)) cfad_array_dir = np.empty((40,len(binss['wdir'])-1,3)) average_spd = np.empty((40,3)) average_dir = np.empty((40,3)) median_spd = np.empty((40,3)) median_dir = np.empty((40,3)) for k,v in arrays.iteritems(): hist_array = np.empty((40,len(binss[k])-1,3)) cfad_array = np.empty((40,len(binss[k])-1,3)) average = np.empty((40,3)) median = np.empty((40,3)) wp = v[0] wp_tta = v[1] wp_notta = v[2] for hgt in range(wp.shape[0]): row1 = wp[hgt,:] row2 = wp_tta[hgt,:] row3 = wp_notta[hgt,:] for n,r in enumerate([row1,row2,row3]): ' following CFAD Yuter et al (1995) ' freq,bins=np.histogram(r[~np.isnan(r)], bins=binss[k]) hist_array[hgt,:,n] = freq cfad_array[hgt,:,n] = 100.*(freq/float(freq.sum())) bin_middle = (bins[1:]+bins[:-1])/2. average[hgt,n] = np.sum(freq*bin_middle)/freq.sum() median[hgt,n] = np.percentile(r[~np.isnan(r)],50) if k == 'wspd': hist_array_spd = hist_array cfad_array_spd = cfad_array average_spd = average median_spd = median else: hist_array_dir = hist_array cfad_array_dir = cfad_array average_dir = average median_dir = median return [hist_array_spd, hist_array_dir, cfad_array_spd, cfad_array_dir, binss['wspd'], binss['wdir'], wprof_df.hgt, wp_hours, tta_hours, notta_hours, average_spd, average_dir, median_spd, median_dir]
def processv2(year=[],wdsurf=None, wdwpro=None,rainbb=None, raincz=None, nhours=None): ''' v2: target loop moved into year loop ''' binss={'wdir': np.arange(0,370,10), 'wspd': np.arange(0,36,1), 'u': np.arange(-15,21,1), 'v': np.arange(-14,21,1), } target = ['wdir','wspd'] arrays = {} wsp = np.empty((40,1)) wsp_tta = np.empty((40,1)) wsp_notta = np.empty((40,1)) wdr = np.empty((40,1)) wdr_tta = np.empty((40,1)) wdr_notta = np.empty((40,1)) for y in year: print('Processing year {}'.format(y)) ' tta analysis ' tta = tta_analysis(y) tta.start_df(wdir_surf = wdsurf, wdir_wprof = wdwpro, rain_bby = rainbb, rain_czd = raincz, nhours = nhours) ' retrieve dates ' include_dates = tta.include_dates tta_dates = tta.tta_dates notta_dates = tta.notta_dates ' read wprof ' wprof_df = parse_data.windprof(y) for n,t in enumerate(target): wprof = wprof_df.dframe[t] ' wprof partition ' wprof = wprof.loc[include_dates] # all included wprof_tta = wprof.loc[tta_dates] # only tta wprof_notta = wprof.loc[notta_dates]# only notta s1 = np.squeeze(pandas2stack(wprof)) if wprof_tta.size > 0: s2 = np.squeeze(pandas2stack(wprof_tta)) ttaok = True else: ttaok =False s3 = np.squeeze(pandas2stack(wprof_notta)) if t == 'wdir': wdr = np.hstack((wdr,s1)) if ttaok is True: if s2.ndim == 1: s2=np.expand_dims(s2,axis=1) wdr_tta = np.hstack((wdr_tta,s2)) wdr_notta = np.hstack((wdr_notta, s3)) else: wsp = np.hstack((wsp,s1)) if ttaok is True: if s2.ndim == 1: s2=np.expand_dims(s2,axis=1) wsp_tta = np.hstack((wsp_tta,s2)) wsp_notta = np.hstack((wsp_notta, s3)) arrays['wdir']=[wdr,wdr_tta,wdr_notta] arrays['wspd']=[wsp,wsp_tta,wsp_notta] uw = -wsp*np.sin(np.radians(wdr)) uw_tta = -wsp_tta*np.sin(np.radians(wdr_tta)) uw_notta = -wsp_notta*np.sin(np.radians(wdr_notta)) vw = -wsp*np.cos(np.radians(wdr)) vw_tta = -wsp_tta*np.cos(np.radians(wdr_tta)) vw_notta = -wsp_notta*np.cos(np.radians(wdr_notta)) arrays['u']=[uw,uw_tta,uw_notta] arrays['v']=[vw,vw_tta,vw_notta] ''' total hours, first rows are empty ''' _,wp_hours = wsp.shape _,tta_hours = wsp_tta.shape _,notta_hours = wsp_notta.shape wp_hours -= 1 tta_hours-= 1 notta_hours -= 1 ' initialize arrays ' hist_array_spd = np.empty((40,len(binss['wspd'])-1,3)) hist_array_dir = np.empty((40,len(binss['wdir'])-1,3)) cfad_array_spd = np.empty((40,len(binss['wspd'])-1,3)) cfad_array_dir = np.empty((40,len(binss['wdir'])-1,3)) average_spd = np.empty((40,3)) average_dir = np.empty((40,3)) median_spd = np.empty((40,3)) median_dir = np.empty((40,3)) ' loop for variable (wdir,wspd) ' for k,v in arrays.iteritems(): hist_array = np.empty((40,len(binss[k])-1,3)) cfad_array = np.empty((40,len(binss[k])-1,3)) average = np.empty((40,3)) median = np.empty((40,3)) ' extract value' wp = v[0] wp_tta = v[1] wp_notta = v[2] ' makes CFAD ' for hgt in range(wp.shape[0]): row1 = wp[hgt,:] row2 = wp_tta[hgt,:] row3 = wp_notta[hgt,:] for n,r in enumerate([row1,row2,row3]): ' following CFAD Yuter et al (1995) ' freq,bins=np.histogram(r[~np.isnan(r)], bins=binss[k]) hist_array[hgt,:,n] = freq cfad_array[hgt,:,n] = 100.*(freq/float(freq.sum())) bin_middle = (bins[1:]+bins[:-1])/2. average[hgt,n] = np.sum(freq*bin_middle)/freq.sum() median[hgt,n] = np.percentile(r[~np.isnan(r)],50) if k == 'wspd': hist_array_spd = hist_array cfad_array_spd = cfad_array average_spd = average median_spd = median elif k == 'wdir': hist_array_dir = hist_array cfad_array_dir = cfad_array average_dir = average median_dir = median elif k == 'u': hist_array_u = hist_array cfad_array_u = cfad_array average_u = average median_u = median elif k == 'v': hist_array_v = hist_array cfad_array_v = cfad_array average_v = average median_v = median return [hist_array_spd, hist_array_dir, hist_array_u, hist_array_v, cfad_array_spd, cfad_array_dir, cfad_array_u, cfad_array_v, binss['wspd'], binss['wdir'], binss['u'], binss['v'], wprof_df.hgt, wp_hours, tta_hours, notta_hours, average_spd, average_dir, average_u, average_v, median_spd, median_dir, median_u, median_v, ]
def preprocess(years=None, layer=None, verbose=True): import pandas as pd import parse_data WD = pd.Series() WS = pd.Series() WD_rain = pd.Series() WS_rain = pd.Series() precip_good = pd.DataFrame() for year in years: wpr = parse_data.windprof(year=year) bby = parse_data.surface('bby', year=year) czd = parse_data.surface('czd', year=year) hgt = wpr.hgt ' find common time period ' first_bby = bby.dframe.index[0] first_czd = czd.dframe.index[0] first_wpr = wpr.dframe.index[0] last_bby = bby.dframe.index[-1] last_czd = czd.dframe.index[-1] last_wpr = wpr.dframe.index[-1] first = max(first_bby, first_czd, first_wpr) last = min(last_bby, last_czd, last_wpr) ' reduce time interval so all start and end at same time ' wpr = wpr.dframe.loc[first:last] bby = bby.dframe.loc[first:last] czd = czd.dframe.loc[first:last] ' append surface values to windprof to make entire profile ' surf_wsp = iter(bby.wspd.values.tolist()) surf_wdr = iter(bby.wdir.values.tolist()) wsp = wpr.wspd.map(lambda x: [surf_wsp.next()] + x) wdr = wpr.wdir.map(lambda x: [surf_wdr.next()] + x) hgt = np.append([0],hgt) ' check nans on precip ' precip = pd.concat([bby.precip, czd.precip], axis=1) precip.columns = ['bby', 'czd'] precip_nans = precip.apply(lambda x: x.isnull().any(), axis=1, reduce=True) precip_nans.name = 'precip_nan' tx = 'year:{}, any_precip_nan:{:4d}' if verbose: print(tx.format(year, precip_nans.sum())) ' check entire profile nans ( same for ws and wd)' prof_nans = wsp.apply(lambda x: np.isnan(x).all()) prof_nans.name = 'prof_nan' ' include only hours when surf and the entire' \ ' profile is non-missing (profile is allowed to have' \ ' at least one non-missing)' nan_df = pd.concat([precip_nans, prof_nans], axis=1) any_nan = nan_df.apply(lambda x: x.any(), axis=1, reduce=True) include = ~any_nan precip_good = precip_good.append(precip[include]) ' rainy days at CZD ' rain_czd = czd.precip > 0 ' reduce and save to big Series ' wdr = wdr[include] wsp = wsp[include] wdr_rain = wdr[rain_czd] wsp_rain = wsp[rain_czd] WD = WD.append(wdr) WS = WS.append(wsp) WD_rain = WD_rain.append(wdr_rain) WS_rain = WS_rain.append(wsp_rain) " compute components " WD_sin = WD.apply(lambda x: sin(x)) WD_cos = WD.apply(lambda x: cos(x)) U_df = -1 * WS.multiply(WD_sin) V_df = -1 * WS.multiply(WD_cos) wind_flow_180 = -(U_df * sin(180) + V_df * cos(180)) wind_flow_90 = U_df * sin(90) + V_df * cos(90) " layer-mean" layer_idx = np.where((hgt >= layer[0]) & (hgt < layer[1]))[0] mean_V = wind_flow_180.apply(lambda x: np.nanmean(x[layer_idx])) mean_U = wind_flow_90.apply(lambda x: np.nanmean(x[layer_idx])) wd_layer = 270-(np.arctan2(mean_V, mean_U)*180/np.pi) wd_layer[wd_layer > 360] -= 360 wd_layer.name = '{:2.0f}-{:2.0f}m'.format(hgt[layer_idx[0]], hgt[layer_idx[-1]]) return dict(WD=WD, WS=WS, WD_rain=WD_rain, WS_rain=WS_rain, wd_layer=wd_layer, precip=precip, precip_good=precip_good)
def plot_with_hist(year=None,target=None,normalized=True, pngsuffix=None): name={'wdir':'Wind Direction', 'wspd':'Wind Speed'} if target == 'wdir': vmin,vmax = [0,360] bins = np.arange(0,370,10) hist_xticks = np.arange(0,400,40) hist_xlim = [0,360] elif target == 'wspd': vmin,vmax = [0,30] bins = np.arange(0,36,1) hist_xticks = np.arange(0,40,5) hist_xlim = [0,35] fig = plt.figure(figsize=(20,5)) gs = gsp.GridSpec(1, 2, width_ratios=[3,1] ) ax1 = plt.subplot(gs[0]) ax2 = plt.subplot(gs[1]) wprof = parse_data.windprof(year) wp = np.squeeze(pandas2stack(wprof.dframe[target])) wp_ma = ma.masked_where(np.isnan(wp),wp) X,Y = wprof.time,wprof.hgt p = ax1.pcolormesh(X,Y,wp_ma,vmin=vmin,vmax=vmax) add_colorbar(ax1,p) ax1.xaxis.set_major_locator(mdates.MonthLocator()) ax1.xaxis.set_major_formatter(mdates.DateFormatter('%b\n%Y')) ax1.set_xlabel(r'$ Time \rightarrow$') ax1.set_ylabel('Altitude [m] MSL') ax1.set_title('BBY Windprof '+name[target]) array = np.empty((40,len(bins)-1)) for hgt in range(wp.shape[0]): row = wp[hgt,:] freq,bins=np.histogram(row[~np.isnan(row)], bins=bins, density=normalized) array[hgt,:]=freq x = bins y = wprof.hgt p = ax2.pcolormesh(x,y,array,cmap='viridis') amin = np.amin(array) amax = np.amax(array) cbar = add_colorbar(ax2,p,size='4%',ticks=[amin,amax]) cbar.ax.set_yticklabels(['low','high']) ax2.set_xticks(hist_xticks) ax2.set_yticklabels('') ax2.set_xlabel(name[target]) ax2.set_xlim(hist_xlim) ax2.set_title('Normalized frequency') plt.tight_layout() if pngsuffix: out_name = 'wprof_{}_{}.png' plt.savefig(out_name.format(target,pngsuffix)) else: plt.show(block=False)
else: tb = t.text + ' | ' + t.text if end_czd > end_bby: te = t.red() + ' | ' + t.text else: te = t.text + ' | ' + t.text txtDate = tb + ' || ' + te fmt = '%Y-%m-%d %H:%M' b_bby = beg_bby.strftime(fmt) b_czd = beg_czd.strftime(fmt) e_bby = end_bby.strftime(fmt) e_czd = end_czd.strftime(fmt) print txtDate.format(b_bby, b_czd, e_czd, e_bby) print '\nBBY windprof dates' txtHeader = '{:^16} | {:^16}' print txtHeader.format('Beg', 'End') for y in [1998] + range(2001, 2013): wprof = parse_data.windprof(y) beg, end = wprof.check_beg_end() txtDate = '{} | {}' fmt = '%Y-%m-%d %H:%M' b = beg.strftime(fmt) e = end.strftime(fmt) print txtDate.format(b, e)
def start_df(self, wdir_surf=None, wdir_wprof=None, rain_bby=None,rain_czd=None,nhours=None): ''' this version uses pandas dataframe, it should be more accurate and simpler ''' import pandas as pd bby = parse_data.surface('bby', self.year) czd = parse_data.surface('czd', self.year) wprof = parse_data.windprof(self.year) beg_bby, end_bby = bby.check_beg_end() beg_czd, end_czd = czd.check_beg_end() beg_wpr, end_wpr = wprof.check_beg_end() ''' trim the head and tail of dataset depending on the latest time of the beginning and earliest of the ending ''' time_beg = max(beg_bby, beg_czd, beg_wpr) time_end = min(end_bby, end_czd, end_wpr) ''' initializations ''' onehr = timedelta(hours=1) bool_buffer = np.array([False] * nhours) tta_bool = np.array([]) count = 0 rng = pd.date_range(start=time_beg, end=time_end, freq='1H') cols = ('wssrf','wswpr','wdsrf','wdwpr','rbby','rczd','tta','consecutive') df = pd.DataFrame(index=rng,columns=cols) time = time_beg ''' loop evaluates each time ''' while (time <= time_end): surf_wd = bby.dframe.loc[time].wdir wpr_wd0 = wprof.dframe.loc[time].wdir[0] # first gate pbby = bby.dframe.loc[time].precip pczd = czd.dframe.loc[time].precip if surf_wd is None: surf_wd = np.nan df.loc[time].wdsrf = surf_wd df.loc[time].wdwpr = wpr_wd0 df.loc[time].rbby = pbby df.loc[time].rczd = pczd df.loc[time].wssrf = bby.dframe.loc[time].wspd df.loc[time].wswpr = wprof.dframe.loc[time].wspd[0] ''' check conditions ''' cond1 = (surf_wd <= wdir_surf) cond2 = (wpr_wd0 <= wdir_wprof) if rain_bby and rain_czd: cond3 = (pbby >= rain_bby) cond4 = (pczd >= rain_czd) tta_condition = cond1 and cond2 and cond3 and cond4 elif rain_czd: cond3 = (pczd >= rain_czd) tta_condition = cond1 and cond2 and cond3 elif rain_bby: cond3 = (pbby >= rain_bby) tta_condition = cond1 and cond2 and cond3 else: tta_condition = cond1 and cond2 df.loc[time].tta = tta_condition ''' construct boolean array indicating hourly TTA conditions with minumm of nhours ''' if tta_condition and bool_buffer.all(): tta_bool = np.append(tta_bool, [True]) elif tta_condition: bool_buffer[count] = True count += 1 if bool_buffer.all(): tta_bool = np.append(tta_bool, bool_buffer) else: bufsum = bool_buffer.sum() if bufsum == 0 or bufsum == nhours: tta_bool = np.append(tta_bool, [False]) else: tta_bool = np.append(tta_bool, [False] * (bufsum + 1)) ' reset buffer ' bool_buffer = np.array([False] * nhours) count = 0 time += onehr df.consecutive = tta_bool.astype(bool) ar_wdsrf = df.wdsrf.values.astype(float) ar_wdwpr = df.wdwpr.values.astype(float) ar_rbby = df.rbby.values.astype(float) ar_rczd = df.rczd.values.astype(float) wdsrfIsNan = np.isnan(ar_wdsrf) wdwprIsNan = np.isnan(ar_wdwpr) rbbyIsNan = np.isnan(ar_rbby) rczdIsNan = np.isnan(ar_rczd) if rain_czd is None: exclude = wdsrfIsNan | wdwprIsNan | rbbyIsNan | rczdIsNan elif rain_czd >= 0.25: ''' this boolean excludes dates when there is no precip at CZD ''' zeros = np.zeros((1,len(ar_rbby))) rczdIsZero = np.squeeze(np.equal(ar_rczd,zeros).T) exclude = wdsrfIsNan | wdwprIsNan | rbbyIsNan | rczdIsNan \ | rczdIsZero tot_rbby = np.round(df.rbby.sum(),0).astype(int) tot_rczd = np.round(df.rczd.sum(),0).astype(int) exc_rbby = np.round(df[exclude].rbby.sum(),0).astype(int) exc_rczd = np.round(df[exclude].rczd.sum(),0).astype(int) inc_rbby = tot_rbby - exc_rbby inc_rczd = tot_rczd - exc_rczd tot_hrs = np.round(df.index.size,0).astype(int) exc_hours = np.round(exclude.sum(),0).astype(int) inc_hours = tot_hrs - exc_hours tta_rbby = np.round(df[df.consecutive].rbby.sum(),0).astype(int) tta_rczd = np.round(df[df.consecutive].rczd.sum(),0).astype(int) notta_rbby = inc_rbby - tta_rbby notta_rczd = inc_rczd - tta_rczd exclude_dates = df[exclude].index include_dates = df[~exclude].index tta_dates = df[~exclude & df.consecutive].index notta_dates = df[~exclude & ~df.consecutive].index tta_hours = tta_dates.size notta_hours = notta_dates.size self.time_beg = time_beg self.time_end = time_end self.count_hrs_include = inc_hours self.count_hrs_exclude = exc_hours self.tot_rainfall_bby = tot_rbby self.tot_rainfall_czd = tot_rczd self.inc_rainfall_bby = inc_rbby self.inc_rainfall_czd = inc_rczd self.exc_rainfall_bby = exc_rbby self.exc_rainfall_czd = exc_rczd self.tta_rainfall_bby = tta_rbby self.tta_rainfall_czd = tta_rczd self.notta_rainfall_bby = notta_rbby self.notta_rainfall_czd = notta_rczd self.tta_hours = tta_hours self.notta_hours = notta_hours self.wprof_hgt = wprof.hgt self.exclude_dates = exclude_dates self.include_dates = include_dates self.tta_dates = tta_dates self.notta_dates = notta_dates self.df = df
def start(self, wdir_surf=None, wdir_wprof=None, rain_bby=None,rain_czd=None,nhours=None): ''' this is an old verion prefer start_df that uses pandas dataframe for analysis ''' bby = parse_data.surface('bby', self.year) czd = parse_data.surface('czd', self.year) wprof = parse_data.windprof(self.year) beg_bby, end_bby = bby.check_beg_end() beg_czd, end_czd = czd.check_beg_end() beg_wpr, end_wpr = wprof.check_beg_end() ''' the latest of the beg ''' time_beg = max(beg_bby, beg_czd, beg_wpr) ''' the earliest of the end ''' time_end = min(end_bby, end_czd, end_wpr) ''' rainfall before all obs start ''' rbby_before = np.nansum(bby.dframe.loc[:time_beg].precip) rczd_before = np.nansum(czd.dframe.loc[:time_beg].precip) ''' rainfall after all obs end ''' rbby_after = np.nansum(bby.dframe.loc[time_end:].precip) rczd_after = np.nansum(czd.dframe.loc[time_end:].precip) ''' number of windprofiles before (after) all obs start (end) ''' nwprof_before = len(wprof.dframe.loc[:time_beg].wdir) nwprof_after = len(wprof.dframe.loc[time_end:].wdir) onehr = timedelta(hours=1) time = time_beg bool_buffer = np.array([False] * nhours) tta_bool = np.array([]) rainfall_czd = np.array([]) rainfall_bby = np.array([]) # wpr_wd_inc = [] # wpr_ws_inc = [] count = 0 count_while = 0 count_exclude = 0 while (time <= time_end): surf_wd = bby.dframe.loc[time].wdir wpr_wd0 = wprof.dframe.loc[time].wdir[0] # first gate pbby = bby.dframe.loc[time].precip pczd = czd.dframe.loc[time].precip ''' exclude data when there is nan in surf obs or windprof first gate ''' if surf_wd is None or np.isnan(surf_wd) or np.isnan(wpr_wd0): # tta_bool = np.append(tta_bool, [False]) count_exclude += 1 time += onehr continue ''' these are obs included in the analysis, then we determine if they are tta or no-tta ''' rainfall_bby=np.append(rainfall_bby,pbby) rainfall_czd=np.append(rainfall_czd,pczd) ''' check conditions ''' cond1 = (surf_wd <= wdir_surf) cond2 = (wpr_wd0 <= wdir_wprof) if rain_bby and rain_czd: cond3 = (pbby >= rain_bby) cond4 = (pczd >= rain_czd) tta_condition = cond1 and cond2 and \ cond3 and cond4 elif rain_czd: cond3 = (pczd >= rain_czd) tta_condition = cond1 and cond2 and cond3 elif rain_bby: cond3 = (pbby >= rain_bby) tta_condition = cond1 and cond2 and cond3 else: tta_condition = cond1 and cond2 ''' construct boolean array indicating hourly TTA conditions with minumm of nhours ''' if tta_condition and bool_buffer.all(): tta_bool = np.append(tta_bool, [True]) elif tta_condition: bool_buffer[count] = True count += 1 if bool_buffer.all(): tta_bool = np.append(tta_bool, bool_buffer) else: bufsum = bool_buffer.sum() if bufsum == 0 or bufsum == nhours: tta_bool = np.append(tta_bool, [False]) else: tta_bool = np.append(tta_bool, [False] * (bufsum + 1)) # reset buffer bool_buffer = np.array([False] * nhours) count = 0 count_while += 1 time += onehr tta_bool = np.array(tta_bool).astype(bool) tta_hours = tta_bool.sum() notta_hours = count_while-tta_hours self.tta_hours = tta_hours self.notta_hours = notta_hours self.time_beg = time_beg self.time_end = time_end self.count_while = count_while self.count_exclude = count_exclude self.total_rainfall_bby = np.nansum(rainfall_bby) self.total_rainfall_czd = np.nansum(rainfall_czd) self.bool = tta_bool self.tta_rainfall_czd = np.nansum(rainfall_czd[tta_bool]) self.tta_rainfall_bby = np.nansum(rainfall_bby[tta_bool]) self.notta_rainfall_czd = np.nansum(rainfall_czd[~tta_bool]) self.notta_rainfall_bby = np.nansum(rainfall_bby[~tta_bool]) self.rainfall_bby_before_analysis = rbby_before self.rainfall_bby_after_analysis = rbby_after self.rainfall_czd_before_analysis = rczd_before self.rainfall_czd_after_analysis = rczd_after self.nwprof_before = nwprof_before self.nwprof_after = nwprof_after self.wprof_hgt = wprof.hgt print('TTA analysis finished')
def start_df_layer(self, wdir_thres = None, wdir_layer = [None,None], # [meters] rain_bby = None, rain_czd = None, nhours = None): ''' this version uses pandas dataframe similar to start_df but uses a layer instead of a level ''' import pandas as pd bby = parse_data.surface('bby', self.year) czd = parse_data.surface('czd', self.year) wprof = parse_data.windprof(self.year) beg_bby, end_bby = bby.check_beg_end() beg_czd, end_czd = czd.check_beg_end() beg_wpr, end_wpr = wprof.check_beg_end() ''' trim the head and tail of dataset depending on the latest time of the beginning and earliest of the ending ''' time_beg = max(beg_bby, beg_czd, beg_wpr) time_end = min(end_bby, end_czd, end_wpr) ''' initializations ''' onehr = timedelta(hours=1) bool_buffer = np.array([False] * nhours) tta_bool = np.array([]) count = 0 rng = pd.date_range(start = time_beg, end = time_end, freq = '1H') idx = np.where((wprof.hgt>=wdir_layer[0]) & (wprof.hgt<wdir_layer[1]))[0] wphgt = wprof.hgt[idx] ''' columns included in the dataframe ''' cols = [] wdircol = 'wd_{}-{:2.0f}m'.format(wdir_layer[0],wphgt[-1]) cols.append(wdircol) cols.append('rbby') cols.append('rczd') cols.append('tta') cols.append('consecutive') ''' create dataframe ''' df = pd.DataFrame(index=rng,columns=cols) ''' loop evaluates each time ''' time = time_beg while (time <= time_end): if wdir_layer[0] == 0: surf_wd = np.array(bby.dframe.loc[time].wdir) surf_ws = np.array(bby.dframe.loc[time].wspd) else: surf_wd = np.array([]) surf_ws = np.array([]) wpro_wd = np.array(wprof.dframe.loc[time].wdir)[idx] wpro_ws = np.array(wprof.dframe.loc[time].wspd)[idx] wd = np.append(surf_wd,wpro_wd) ws = np.append(surf_ws,wpro_ws) u = -ws*np.sin(np.radians(wd)) v = -ws*np.cos(np.radians(wd)) u_mean = u.mean() v_mean = v.mean() # ws_mean = np.sqrt(u_mean**2+v_mean**2) wd_mean = 270 - np.arctan2(v_mean,u_mean)*180./np.pi if wd_mean > 360: wd_mean -= 360 df.loc[time][wdircol] = wd_mean pbby = bby.dframe.loc[time].precip df.loc[time].rbby = pbby pczd = czd.dframe.loc[time].precip df.loc[time].rczd = pczd # df.loc[time].wssrf = bby.dframe.loc[time].wspd # df.loc[time].wswpr = wprof.dframe.loc[time].wspd[0] ''' check conditions ''' if wdir_thres: if isinstance(wdir_thres,int): cond1 = (wd_mean < wdir_thres) elif isinstance(wdir_thres,str): cond1 = parse_operator(wd_mean,wdir_thres) if rain_czd: cond3 = (pczd >= rain_czd) if rain_bby: cond4 = (pbby >= rain_bby) ''' create joint condition ''' if wdir_thres and rain_bby and rain_czd: tta_condition = cond1 and cond3 and cond4 elif wdir_thres and rain_czd: tta_condition = cond1 and cond3 elif wdir_thres and rain_bby: tta_condition = cond1 and cond4 else: tta_condition = cond1 df.loc[time].tta = tta_condition ''' construct boolean array indicating hourly TTA conditions with minimum of nhours ''' if tta_condition and bool_buffer.all(): tta_bool = np.append(tta_bool, [True]) elif tta_condition: bool_buffer[count] = True count += 1 if bool_buffer.all(): tta_bool = np.append(tta_bool, bool_buffer) else: bufsum = bool_buffer.sum() if bufsum == 0 or bufsum == nhours: tta_bool = np.append(tta_bool, [False]) else: tta_bool = np.append(tta_bool, [False] * (bufsum + 1)) ' reset buffer ' bool_buffer = np.array([False] * nhours) count = 0 # time += onehr df.consecutive = tta_bool.astype(bool) ar_wdir = df[wdircol].values.astype(float) ar_rbby = df.rbby.values.astype(float) ar_rczd = df.rczd.values.astype(float) wdirIsNan = np.isnan(ar_wdir) rbbyIsNan = np.isnan(ar_rbby) rczdIsNan = np.isnan(ar_rczd) if rain_czd is None: exclude = wdirIsNan | rbbyIsNan | rczdIsNan elif rain_czd >= 0.25: ''' this boolean excludes dates when there is no precip at CZD ''' zeros = np.zeros((1,len(ar_rbby))) rczdIsZero = np.squeeze(np.equal(ar_rczd,zeros).T) exclude = wdirIsNan | rbbyIsNan | rczdIsNan | rczdIsZero tot_rbby = np.round(df.rbby.sum(),3) tot_rczd = np.round(df.rczd.sum(),3) exc_rbby = np.round(df[exclude].rbby.sum(),3) exc_rczd = np.round(df[exclude].rczd.sum(),3) inc_rbby = tot_rbby - exc_rbby inc_rczd = tot_rczd - exc_rczd tot_hrs = np.round(df.index.size,0).astype(int) exc_hours = np.round(exclude.sum(),0).astype(int) inc_hours = tot_hrs - exc_hours tta_rbby = np.round(df[df.consecutive].rbby.sum(),3) tta_rczd = np.round(df[df.consecutive].rczd.sum(),3) notta_rbby = inc_rbby - tta_rbby notta_rczd = inc_rczd - tta_rczd exclude_dates = df[exclude].index include_dates = df[~exclude].index tta_dates = df[~exclude & df.consecutive].index notta_dates = df[~exclude & ~df.consecutive].index tta_hours = tta_dates.size notta_hours = notta_dates.size self.time_beg = time_beg self.time_end = time_end self.count_hrs_include = inc_hours self.count_hrs_exclude = exc_hours self.tot_rainfall_bby = tot_rbby self.tot_rainfall_czd = tot_rczd self.inc_rainfall_bby = inc_rbby self.inc_rainfall_czd = inc_rczd self.exc_rainfall_bby = exc_rbby self.exc_rainfall_czd = exc_rczd self.tta_rainfall_bby = tta_rbby self.tta_rainfall_czd = tta_rczd self.notta_rainfall_bby = notta_rbby self.notta_rainfall_czd = notta_rczd self.tta_hours = tta_hours self.notta_hours = notta_hours self.wprof_hgt = wprof.hgt self.exclude_dates = exclude_dates self.include_dates = include_dates self.tta_dates = tta_dates self.notta_dates = notta_dates self.df = df
def start_df(self, wdir_surf = None, wdir_wprof = None, wprof_gate = 0, rain_bby = None, rain_czd = None, nhours = None): ''' this version uses pandas dataframe, it should be more accurate and simpler than start method ''' import pandas as pd bby = parse_data.surface('bby', self.year) czd = parse_data.surface('czd', self.year) wprof = parse_data.windprof(self.year) beg_bby, end_bby = bby.check_beg_end() beg_czd, end_czd = czd.check_beg_end() beg_wpr, end_wpr = wprof.check_beg_end() ''' trim the head and tail of dataset depending on the latest time of the beginning and earliest of the ending ''' time_beg = max(beg_bby, beg_czd, beg_wpr) time_end = min(end_bby, end_czd, end_wpr) ''' initializations ''' onehr = timedelta(hours=1) bool_buffer = np.array([False] * nhours) tta_bool = np.array([]) count = 0 rng = pd.date_range(start=time_beg, end=time_end, freq='1H') ''' columns included in the dataframe ''' cols = [] cols.append('wdsrf') wprofcol = 'wdwpr{}'.format(wprof_gate) cols.append(wprofcol) cols.append('rbby') cols.append('rczd') cols.append('tta') cols.append('consecutive') ''' create dataframe ''' df = pd.DataFrame(index=rng,columns=cols) ''' loop evaluates each time ''' time = time_beg while (time <= time_end): surf_wd = bby.dframe.loc[time].wdir df.loc[time].wdsrf = surf_wd wpr_wd0 = wprof.dframe.loc[time].wdir[wprof_gate] df.loc[time][wprofcol] = wpr_wd0 pbby = bby.dframe.loc[time].precip df.loc[time].rbby = pbby pczd = czd.dframe.loc[time].precip df.loc[time].rczd = pczd # if surf_wd is None: # surf_wd = np.nan # df.loc[time].wssrf = bby.dframe.loc[time].wspd # df.loc[time].wswpr = wprof.dframe.loc[time].wspd[0] ''' check conditions ''' if wdir_surf: if isinstance(wdir_surf,int): cond1 = (surf_wd <= wdir_surf) elif isinstance(wdir_surf,str): cond1 = parse_operator(surf_wd,wdir_surf) if wdir_wprof: if isinstance(wdir_wprof,int): cond2 = (wpr_wd0 <= wdir_wprof) elif isinstance(wdir_wprof,str): cond2 = parse_operator(wpr_wd0,wdir_wprof) if rain_czd: cond3 = (pczd >= rain_czd) if rain_bby: cond4 = (pbby >= rain_bby) ''' create joint condition ''' if wdir_surf and wdir_wprof and rain_bby and rain_czd: tta_condition = cond1 and cond2 and cond3 and cond4 elif wdir_surf and wdir_wprof and rain_czd: tta_condition = cond1 and cond2 and cond3 elif wdir_surf and wdir_wprof and rain_bby: tta_condition = cond1 and cond2 and cond4 elif wdir_surf and rain_czd: tta_condition = cond1 and cond3 elif wdir_wprof and rain_czd: tta_condition = cond2 and cond3 elif wdir_surf and rain_bby: tta_condition = cond1 and cond4 elif wdir_wprof and rain_bby: tta_condition = cond2 and cond4 elif wdir_surf and wdir_wprof: tta_condition = cond1 and cond2 else: tta_condition = cond1 df.loc[time].tta = tta_condition ''' construct boolean array indicating hourly TTA conditions with minumm of nhours ''' if tta_condition and bool_buffer.all(): tta_bool = np.append(tta_bool, [True]) elif tta_condition: bool_buffer[count] = True count += 1 if bool_buffer.all(): tta_bool = np.append(tta_bool, bool_buffer) else: bufsum = bool_buffer.sum() if bufsum == 0 or bufsum == nhours: tta_bool = np.append(tta_bool, [False]) else: tta_bool = np.append(tta_bool, [False] * (bufsum + 1)) ' reset buffer ' bool_buffer = np.array([False] * nhours) count = 0 time += onehr df.consecutive = tta_bool.astype(bool) ar_wdsrf = df.wdsrf.values.astype(float) ar_wdwpr = df[wprofcol].values.astype(float) ar_rbby = df.rbby.values.astype(float) ar_rczd = df.rczd.values.astype(float) wdsrfIsNan = np.isnan(ar_wdsrf) wdwprIsNan = np.isnan(ar_wdwpr) rbbyIsNan = np.isnan(ar_rbby) rczdIsNan = np.isnan(ar_rczd) if rain_czd is None: exclude = wdsrfIsNan | wdwprIsNan | rbbyIsNan | rczdIsNan elif rain_czd >= 0.25: ''' this boolean excludes dates when there is no precip at CZD ''' zeros = np.zeros((1,len(ar_rbby))) rczdIsZero = np.squeeze(np.equal(ar_rczd,zeros).T) exclude = wdsrfIsNan | wdwprIsNan | rbbyIsNan | rczdIsNan \ | rczdIsZero tot_rbby = np.round(df.rbby.sum(),3) tot_rczd = np.round(df.rczd.sum(),3) exc_rbby = np.round(df[exclude].rbby.sum(),3) exc_rczd = np.round(df[exclude].rczd.sum(),3) inc_rbby = tot_rbby - exc_rbby inc_rczd = tot_rczd - exc_rczd tot_hrs = np.round(df.index.size,0).astype(int) exc_hours = np.round(exclude.sum(),0).astype(int) inc_hours = tot_hrs - exc_hours tta_rbby = np.round(df[df.consecutive].rbby.sum(),3) tta_rczd = np.round(df[df.consecutive].rczd.sum(),3) notta_rbby = inc_rbby - tta_rbby notta_rczd = inc_rczd - tta_rczd exclude_dates = df[exclude].index include_dates = df[~exclude].index tta_dates = df[~exclude & df.consecutive].index notta_dates = df[~exclude & ~df.consecutive].index tta_hours = tta_dates.size notta_hours = notta_dates.size self.time_beg = time_beg self.time_end = time_end self.count_hrs_include = inc_hours self.count_hrs_exclude = exc_hours self.tot_rainfall_bby = tot_rbby self.tot_rainfall_czd = tot_rczd self.inc_rainfall_bby = inc_rbby self.inc_rainfall_czd = inc_rczd self.exc_rainfall_bby = exc_rbby self.exc_rainfall_czd = exc_rczd self.tta_rainfall_bby = tta_rbby self.tta_rainfall_czd = tta_rczd self.notta_rainfall_bby = notta_rbby self.notta_rainfall_czd = notta_rczd self.tta_hours = tta_hours self.notta_hours = notta_hours self.wprof_hgt = wprof.hgt self.exclude_dates = exclude_dates self.include_dates = include_dates self.tta_dates = tta_dates self.notta_dates = notta_dates self.df = df
def plot_with_hist(year=None, target=None, normalized=True, pngsuffix=None): name = {'wdir': 'Wind Direction', 'wspd': 'Wind Speed'} if target == 'wdir': vmin, vmax = [0, 360] bins = np.arange(0, 370, 10) hist_xticks = np.arange(0, 400, 40) hist_xlim = [0, 360] elif target == 'wspd': vmin, vmax = [0, 30] bins = np.arange(0, 36, 1) hist_xticks = np.arange(0, 40, 5) hist_xlim = [0, 35] fig = plt.figure(figsize=(20, 5)) gs = gsp.GridSpec(1, 2, width_ratios=[3, 1]) ax1 = plt.subplot(gs[0]) ax2 = plt.subplot(gs[1]) wprof = parse_data.windprof(year) wp = np.squeeze(pandas2stack(wprof.dframe[target])) wp_ma = ma.masked_where(np.isnan(wp), wp) X, Y = wprof.time, wprof.hgt p = ax1.pcolormesh(X, Y, wp_ma, vmin=vmin, vmax=vmax) add_colorbar(ax1, p) ax1.xaxis.set_major_locator(mdates.MonthLocator()) ax1.xaxis.set_major_formatter(mdates.DateFormatter('%b\n%Y')) ax1.set_xlabel(r'$ Time \rightarrow$') ax1.set_ylabel('Altitude [m] MSL') ax1.set_title('BBY Windprof ' + name[target]) array = np.empty((40, len(bins) - 1)) for hgt in range(wp.shape[0]): row = wp[hgt, :] freq, bins = np.histogram(row[~np.isnan(row)], bins=bins, density=normalized) array[hgt, :] = freq x = bins y = wprof.hgt p = ax2.pcolormesh(x, y, array, cmap='viridis') amin = np.amin(array) amax = np.amax(array) cbar = add_colorbar(ax2, p, size='4%', ticks=[amin, amax]) cbar.ax.set_yticklabels(['low', 'high']) ax2.set_xticks(hist_xticks) ax2.set_yticklabels('') ax2.set_xlabel(name[target]) ax2.set_xlim(hist_xlim) ax2.set_title('Normalized frequency') plt.tight_layout() if pngsuffix: out_name = 'wprof_{}_{}.png' plt.savefig(out_name.format(target, pngsuffix)) else: plt.show(block=False)
def preprocess(years=None, layer=None, verbose=True): import pandas as pd import parse_data WD = pd.Series() WS = pd.Series() WD_rain = pd.Series() WS_rain = pd.Series() precip_good = pd.DataFrame() for year in years: wpr = parse_data.windprof(year=year) bby = parse_data.surface('bby', year=year) czd = parse_data.surface('czd', year=year) hgt = wpr.hgt ' find common time period ' first_bby = bby.dframe.index[0] first_czd = czd.dframe.index[0] first_wpr = wpr.dframe.index[0] last_bby = bby.dframe.index[-1] last_czd = czd.dframe.index[-1] last_wpr = wpr.dframe.index[-1] first = max(first_bby, first_czd, first_wpr) last = min(last_bby, last_czd, last_wpr) ' reduce time interval so all start and end at same time ' wpr = wpr.dframe.loc[first:last] bby = bby.dframe.loc[first:last] czd = czd.dframe.loc[first:last] ' append surface values to windprof to make entire profile ' surf_wsp = iter(bby.wspd.values.tolist()) surf_wdr = iter(bby.wdir.values.tolist()) wsp = wpr.wspd.map(lambda x: [surf_wsp.next()] + x) wdr = wpr.wdir.map(lambda x: [surf_wdr.next()] + x) hgt = np.append([0], hgt) ' check nans on precip ' precip = pd.concat([bby.precip, czd.precip], axis=1) precip.columns = ['bby', 'czd'] precip_nans = precip.apply(lambda x: x.isnull().any(), axis=1, reduce=True) precip_nans.name = 'precip_nan' tx = 'year:{}, any_precip_nan:{:4d}' if verbose: print(tx.format(year, precip_nans.sum())) ' check entire profile nans ( same for ws and wd)' prof_nans = wsp.apply(lambda x: np.isnan(x).all()) prof_nans.name = 'prof_nan' ' include only hours when surf and the entire' \ ' profile is non-missing (profile is allowed to have' \ ' at least one non-missing)' nan_df = pd.concat([precip_nans, prof_nans], axis=1) any_nan = nan_df.apply(lambda x: x.any(), axis=1, reduce=True) include = ~any_nan precip_good = precip_good.append(precip[include]) ' rainy days at CZD ' rain_czd = czd.precip > 0 ' reduce and save to big Series ' wdr = wdr[include] wsp = wsp[include] wdr_rain = wdr[rain_czd] wsp_rain = wsp[rain_czd] WD = WD.append(wdr) WS = WS.append(wsp) WD_rain = WD_rain.append(wdr_rain) WS_rain = WS_rain.append(wsp_rain) " compute components " WD_sin = WD.apply(lambda x: sin(x)) WD_cos = WD.apply(lambda x: cos(x)) U_df = -1 * WS.multiply(WD_sin) V_df = -1 * WS.multiply(WD_cos) wind_flow_180 = -(U_df * sin(180) + V_df * cos(180)) wind_flow_90 = U_df * sin(90) + V_df * cos(90) " layer-mean" layer_idx = np.where((hgt >= layer[0]) & (hgt < layer[1]))[0] mean_V = wind_flow_180.apply(lambda x: np.nanmean(x[layer_idx])) mean_U = wind_flow_90.apply(lambda x: np.nanmean(x[layer_idx])) wd_layer = 270 - (np.arctan2(mean_V, mean_U) * 180 / np.pi) wd_layer[wd_layer > 360] -= 360 wd_layer.name = '{:2.0f}-{:2.0f}m'.format(hgt[layer_idx[0]], hgt[layer_idx[-1]]) return dict(WD=WD, WS=WS, WD_rain=WD_rain, WS_rain=WS_rain, wd_layer=wd_layer, precip=precip, precip_good=precip_good)
try: WS except NameError: # ws = {th:list() for th in target_hgts} # wd = {th:list() for th in target_hgts} # wdsrf = list() WS = pd.DataFrame() WD = pd.DataFrame() for year in years: czd = parse_data.surface('czd', year=year) bby = parse_data.surface('bby', year=year) wpr = parse_data.windprof(year=year) hgt = wpr.hgt ''' reduce to common time period ''' first_bby = bby.dframe.index[0] first_czd = czd.dframe.index[0] first_wpr = wpr.dframe.index[0] last_bby = bby.dframe.index[-1] last_czd = czd.dframe.index[-1] last_wpr = wpr.dframe.index[-1] first = max(first_bby,first_czd,first_wpr) last = min(last_bby,last_czd,last_wpr) wspd = wpr.dframe.loc[first:last].wspd
def start(self, wdir_surf=None, wdir_wprof=None, rain_bby=None,rain_czd=None,nhours=None): ''' this is an old verion prefer start_df that uses pandas dataframe ''' bby = parse_data.surface('bby', self.year) czd = parse_data.surface('czd', self.year) wprof = parse_data.windprof(self.year) beg_bby, end_bby = bby.check_beg_end() beg_czd, end_czd = czd.check_beg_end() beg_wpr, end_wpr = wprof.check_beg_end() ''' the latest of the beg ''' time_beg = max(beg_bby, beg_czd, beg_wpr) ''' the earliest of the end ''' time_end = min(end_bby, end_czd, end_wpr) ''' rainfall before all obs start ''' rbby_before = np.nansum(bby.dframe.loc[:time_beg].precip) rczd_before = np.nansum(czd.dframe.loc[:time_beg].precip) ''' rainfall after all obs end ''' rbby_after = np.nansum(bby.dframe.loc[time_end:].precip) rczd_after = np.nansum(czd.dframe.loc[time_end:].precip) ''' number of windprofiles before (after) all obs start (end) ''' nwprof_before = len(wprof.dframe.loc[:time_beg].wdir) nwprof_after = len(wprof.dframe.loc[time_end:].wdir) onehr = timedelta(hours=1) time = time_beg bool_buffer = np.array([False] * nhours) tta_bool = np.array([]) rainfall_czd = np.array([]) rainfall_bby = np.array([]) # wpr_wd_inc = [] # wpr_ws_inc = [] count = 0 count_while = 0 count_exclude = 0 while (time <= time_end): surf_wd = bby.dframe.loc[time].wdir wpr_wd0 = wprof.dframe.loc[time].wdir[0] # first gate pbby = bby.dframe.loc[time].precip pczd = czd.dframe.loc[time].precip ''' exclude data when there is nan in surf obs or windprof first gate ''' if surf_wd is None or np.isnan(surf_wd) or np.isnan(wpr_wd0): # tta_bool = np.append(tta_bool, [False]) count_exclude += 1 time += onehr continue ''' these are obs included in the analysis, then we determine if they are tta or no-tta ''' rainfall_bby=np.append(rainfall_bby,pbby) rainfall_czd=np.append(rainfall_czd,pczd) ''' check conditions ''' cond1 = (surf_wd <= wdir_surf) cond2 = (wpr_wd0 <= wdir_wprof) if rain_bby and rain_czd: cond3 = (pbby >= rain_bby) cond4 = (pczd >= rain_czd) tta_condition = cond1 and cond2 and \ cond3 and cond4 elif rain_czd: cond3 = (pczd >= rain_czd) tta_condition = cond1 and cond2 and cond3 elif rain_bby: cond3 = (pbby >= rain_bby) tta_condition = cond1 and cond2 and cond3 else: tta_condition = cond1 and cond2 ''' construct boolean array indicating hourly TTA conditions with minumm of nhours ''' if tta_condition and bool_buffer.all(): tta_bool = np.append(tta_bool, [True]) elif tta_condition: bool_buffer[count] = True count += 1 if bool_buffer.all(): tta_bool = np.append(tta_bool, bool_buffer) else: bufsum = bool_buffer.sum() if bufsum == 0 or bufsum == nhours: tta_bool = np.append(tta_bool, [False]) else: tta_bool = np.append(tta_bool, [False] * (bufsum + 1)) # reset buffer bool_buffer = np.array([False] * nhours) count = 0 count_while += 1 time += onehr tta_bool = np.array(tta_bool).astype(bool) tta_hours = tta_bool.sum() notta_hours = count_while-tta_hours self.tta_hours = tta_hours self.notta_hours = notta_hours self.time_beg = time_beg self.time_end = time_end self.count_while = count_while self.count_exclude = count_exclude self.total_rainfall_bby = np.nansum(rainfall_bby) self.total_rainfall_czd = np.nansum(rainfall_czd) self.bool = tta_bool self.tta_rainfall_czd = np.nansum(rainfall_czd[tta_bool]) self.tta_rainfall_bby = np.nansum(rainfall_bby[tta_bool]) self.notta_rainfall_czd = np.nansum(rainfall_czd[~tta_bool]) self.notta_rainfall_bby = np.nansum(rainfall_bby[~tta_bool]) self.rainfall_bby_before_analysis = rbby_before self.rainfall_bby_after_analysis = rbby_after self.rainfall_czd_before_analysis = rczd_before self.rainfall_czd_after_analysis = rczd_after self.nwprof_before = nwprof_before self.nwprof_after = nwprof_after self.wprof_hgt = wprof.hgt print('TTA analysis finished')
#years = [1998] years = [1998] + range(2001, 2013) try: wdsrf except NameError: ws = {th: list() for th in target_hgts} wd = {th: list() for th in target_hgts} wdsrf = list() select_rain = 'all' for year in years: wpr = parse_data.windprof(year=year) wspd = wpr.dframe.wspd wdir = wpr.dframe.wdir hgt = wpr.hgt czd = parse_data.surface('czd', year=year) bby = parse_data.surface('bby', year=year) if select_rain == 'all': select = None elif select_rain == 'czd': rain_czd = czd.dframe.precip > 0 select = rain_czd[rain_czd].index elif select_rain == 'bby': rain_bby = bby.dframe.precip > 0 select = rain_bby[rain_bby].index
def plot(year=[],target=None,pngsuffix=False, normalized=True, contourf=True, pdfsuffix=False, wdsurf=None, wdwpro=None, rainbb=None, raincz=None, nhours=None): name={'wdir':'Wind Direction', 'wspd':'Wind Speed'} if target == 'wdir': bins = np.arange(0,370,10) hist_xticks = np.arange(0,420,60) hist_xlim = [0,360] elif target == 'wspd': bins = np.arange(0,36,1) hist_xticks = np.arange(0,40,5) hist_xlim = [0,35] first = True for y in year: print('Processing year {}'.format(y)) ' tta analysis ' tta = tta_analysis(y) tta.start_df(wdir_surf=wdsurf, wdir_wprof=wdwpro, rain_bby=rainbb, rain_czd=raincz, nhours=nhours) ' retrieve dates ' include_dates = tta.include_dates tta_dates = tta.tta_dates notta_dates = tta.notta_dates ' read wprof ' wprof_df = parse_data.windprof(y) wprof = wprof_df.dframe[target] ' wprof partition ' wprof = wprof.loc[include_dates] # all included wprof_tta = wprof.loc[tta_dates] # only tta wprof_notta = wprof.loc[notta_dates]# only notta s1 = np.squeeze(pandas2stack(wprof)) s2 = np.squeeze(pandas2stack(wprof_tta)) s3 = np.squeeze(pandas2stack(wprof_notta)) if first: wp = s1 wp_tta = s2 wp_notta = s3 first = False else: wp = np.hstack((wp,s1)) wp_tta = np.hstack((wp_tta,s2)) wp_notta = np.hstack((wp_notta, s3)) _,wp_hours = wp.shape _,tta_hours = wp_tta.shape _,notta_hours = wp_notta.shape ' makes CFAD ' hist_array = np.empty((40,len(bins)-1,3)) for hgt in range(wp.shape[0]): row1 = wp[hgt,:] row2 = wp_tta[hgt,:] row3 = wp_notta[hgt,:] for n,r in enumerate([row1,row2,row3]): ' following CFAD Yuter et al (1995) ' freq,bins=np.histogram(r[~np.isnan(r)], bins=bins) if normalized: hist_array[hgt,:,n] = 100.*(freq/float(freq.sum())) else: hist_array[hgt,:,n] = freq fig,axs = plt.subplots(1,3,sharey=True,figsize=(10,8)) ax1 = axs[0] ax2 = axs[1] ax3 = axs[2] hist_wp = np.squeeze(hist_array[:,:,0]) hist_wptta = np.squeeze(hist_array[:,:,1]) hist_wpnotta = np.squeeze(hist_array[:,:,2]) x = bins y = wprof_df.hgt if contourf: X,Y = np.meshgrid(x,y) nancol = np.zeros((40,1))+np.nan hist_wp = np.hstack((hist_wp,nancol)) hist_wptta = np.hstack((hist_wptta,nancol)) hist_wpnotta = np.hstack((hist_wpnotta,nancol)) vmax=20 nlevels = 10 delta = int(vmax/nlevels) v = np.arange(2,vmax+delta,delta) cmap = cm.get_cmap('plasma') ax1.contourf(X,Y,hist_wp,v,cmap=cmap) p = ax2.contourf(X,Y,hist_wptta,v,cmap=cmap,extend='max') p.cmap.set_over(cmap(1.0)) ax3.contourf(X,Y,hist_wpnotta,v,cmap=cmap) cbar = add_colorbar(ax3,p,size='4%') else: p = ax1.pcolormesh(x,y,hist_wp,cmap='viridis') ax2.pcolormesh(x,y,hist_wptta,cmap='viridis') ax3.pcolormesh(x,y,hist_wpnotta,cmap='viridis') amin = np.amin(hist_wpnotta) amax = np.amax(hist_wpnotta) cbar = add_colorbar(ax3,p,size='4%',ticks=[amin,amax]) cbar.ax.set_yticklabels(['low','high']) ' --- setup ax1 --- ' amin = np.amin(hist_wp) amax = np.amax(hist_wp) ax1.set_xticks(hist_xticks) ax1.set_xlim(hist_xlim) ax1.set_ylim([0,4000]) txt = 'All profiles (n={})'.format(wp_hours) ax1.text(0.5,0.95,txt,fontsize=15, transform=ax1.transAxes,va='bottom',ha='center') ax1.set_ylabel('Altitude [m] MSL') ' --- setup ax2 --- ' amin = np.amin(hist_wptta) amax = np.amax(hist_wptta) ax2.set_xticks(hist_xticks) ax2.set_xlim(hist_xlim) ax2.set_ylim([0,4000]) ax2.set_xlabel(name[target]) txt = 'TTA (n={})'.format(tta_hours) ax2.text(0.5,0.95,txt,fontsize=15, transform=ax2.transAxes,va='bottom',ha='center') ' --- setup ax3 --- ' ax3.set_xticks(hist_xticks) ax3.set_xlim(hist_xlim) ax3.set_ylim([0,4000]) txt = 'NO-TTA (n={})'.format(notta_hours) ax3.text(0.5,0.95,txt,fontsize=15, transform=ax3.transAxes,va='bottom',ha='center') title = 'Normalized frequencies of BBY wind profiles {} \n' title += 'TTA wdir_surf:{}, wdir_wp:{}, ' title += 'rain_bby:{}, rain_czd:{}, nhours:{}' if len(year) == 1: yy = 'year {}'.format(year[0]) else: yy = 'year {} to {}'.format(year[0],year[-1]) plt.suptitle(title.format(yy, wdsurf, wdwpro, rainbb, raincz, nhours), fontsize=15) plt.subplots_adjust(top=0.9,left=0.1,right=0.95,bottom=0.1, wspace=0.1) if pngsuffix: out_name = 'wprof_{}_cfad{}.png' plt.savefig(out_name.format(target,pngsuffix)) plt.close() elif pdfsuffix: out_name = 'wprof_{}_cfad{}.pdf' plt.savefig(out_name.format(target,pdfsuffix)) plt.close() else: plt.show()