wdsrf except NameError: ws = {th: list() for th in target_hgts} wd = {th: list() for th in target_hgts} wdsrf = list() select_rain = 'all' for year in years: wpr = parse_data.windprof(year=year) wspd = wpr.dframe.wspd wdir = wpr.dframe.wdir hgt = wpr.hgt czd = parse_data.surface('czd', year=year) bby = parse_data.surface('bby', year=year) if select_rain == 'all': select = None elif select_rain == 'czd': rain_czd = czd.dframe.precip > 0 select = rain_czd[rain_czd].index elif select_rain == 'bby': rain_bby = bby.dframe.precip > 0 select = rain_bby[rain_bby].index elif select_rain == 'norain': norain_czd = czd.dframe.precip == 0 norain_bby = bby.dframe.precip == 0 norain = norain_czd & norain_bby select = norain[norain].index
except NameError: results = collections.OrderedDict() for tr in thres: results[tr] = {'U': U, 'V': V} for year in years: for p in params: tta = tta_analysis(year=year) tta.start_df_layer(**p) tta_dates = tta.tta_dates " parse surface and profile obs " bby = parse_data.surface('bby', year=year) wpr = parse_data.windprof(year=year) wpr_tta = wpr.dframe.loc[tta_dates] wdr_tta = wpr_tta['wdir'] wsp_tta = wpr_tta['wspd'] bby_tta = bby.dframe.loc[tta_dates] " append surface values to windprof " surf_wsp = iter(bby_tta.wspd.values.tolist()) surf_wdr = iter(bby_tta.wdir.values.tolist()) wsp_tta = wsp_tta.map(lambda x: [surf_wsp.next()] + x) wdr_tta = wdr_tta.map(lambda x: [surf_wdr.next()] + x)
#years = [1998] years = [1998]+range(2001,2013) try: WS except NameError: # ws = {th:list() for th in target_hgts} # wd = {th:list() for th in target_hgts} # wdsrf = list() WS = pd.DataFrame() WD = pd.DataFrame() for year in years: czd = parse_data.surface('czd', year=year) bby = parse_data.surface('bby', year=year) wpr = parse_data.windprof(year=year) hgt = wpr.hgt ''' reduce to common time period ''' first_bby = bby.dframe.index[0] first_czd = czd.dframe.index[0] first_wpr = wpr.dframe.index[0] last_bby = bby.dframe.index[-1] last_czd = czd.dframe.index[-1] last_wpr = wpr.dframe.index[-1] first = max(first_bby,first_czd,first_wpr) last = min(last_bby,last_czd,last_wpr)
import parse_data from ctext import ctext txtHeader1 = '\nSurface\n{:^35} || {:^35}' print txtHeader1.format('Beg', 'End') txtHeader2 = '{:^16} | {:^16} || {:^16} {:^16}' print txtHeader2.format('BBY', 'CZD', 'CZD', 'BBY') t = ctext('{}') for y in [1998] + range(2001, 2013): bby = parse_data.surface('bby', y) czd = parse_data.surface('czd', y) beg_bby, end_bby = bby.check_beg_end() beg_czd, end_czd = czd.check_beg_end() if beg_czd < beg_bby: tb = t.text + ' | ' + t.red() else: tb = t.text + ' | ' + t.text if end_czd > end_bby: te = t.red() + ' | ' + t.text else: te = t.text + ' | ' + t.text txtDate = tb + ' || ' + te fmt = '%Y-%m-%d %H:%M'
def preprocess(years=None, layer=None, verbose=True): import pandas as pd import parse_data WD = pd.Series() WS = pd.Series() WD_rain = pd.Series() WS_rain = pd.Series() precip_good = pd.DataFrame() for year in years: wpr = parse_data.windprof(year=year) bby = parse_data.surface('bby', year=year) czd = parse_data.surface('czd', year=year) hgt = wpr.hgt ' find common time period ' first_bby = bby.dframe.index[0] first_czd = czd.dframe.index[0] first_wpr = wpr.dframe.index[0] last_bby = bby.dframe.index[-1] last_czd = czd.dframe.index[-1] last_wpr = wpr.dframe.index[-1] first = max(first_bby, first_czd, first_wpr) last = min(last_bby, last_czd, last_wpr) ' reduce time interval so all start and end at same time ' wpr = wpr.dframe.loc[first:last] bby = bby.dframe.loc[first:last] czd = czd.dframe.loc[first:last] ' append surface values to windprof to make entire profile ' surf_wsp = iter(bby.wspd.values.tolist()) surf_wdr = iter(bby.wdir.values.tolist()) wsp = wpr.wspd.map(lambda x: [surf_wsp.next()] + x) wdr = wpr.wdir.map(lambda x: [surf_wdr.next()] + x) hgt = np.append([0],hgt) ' check nans on precip ' precip = pd.concat([bby.precip, czd.precip], axis=1) precip.columns = ['bby', 'czd'] precip_nans = precip.apply(lambda x: x.isnull().any(), axis=1, reduce=True) precip_nans.name = 'precip_nan' tx = 'year:{}, any_precip_nan:{:4d}' if verbose: print(tx.format(year, precip_nans.sum())) ' check entire profile nans ( same for ws and wd)' prof_nans = wsp.apply(lambda x: np.isnan(x).all()) prof_nans.name = 'prof_nan' ' include only hours when surf and the entire' \ ' profile is non-missing (profile is allowed to have' \ ' at least one non-missing)' nan_df = pd.concat([precip_nans, prof_nans], axis=1) any_nan = nan_df.apply(lambda x: x.any(), axis=1, reduce=True) include = ~any_nan precip_good = precip_good.append(precip[include]) ' rainy days at CZD ' rain_czd = czd.precip > 0 ' reduce and save to big Series ' wdr = wdr[include] wsp = wsp[include] wdr_rain = wdr[rain_czd] wsp_rain = wsp[rain_czd] WD = WD.append(wdr) WS = WS.append(wsp) WD_rain = WD_rain.append(wdr_rain) WS_rain = WS_rain.append(wsp_rain) " compute components " WD_sin = WD.apply(lambda x: sin(x)) WD_cos = WD.apply(lambda x: cos(x)) U_df = -1 * WS.multiply(WD_sin) V_df = -1 * WS.multiply(WD_cos) wind_flow_180 = -(U_df * sin(180) + V_df * cos(180)) wind_flow_90 = U_df * sin(90) + V_df * cos(90) " layer-mean" layer_idx = np.where((hgt >= layer[0]) & (hgt < layer[1]))[0] mean_V = wind_flow_180.apply(lambda x: np.nanmean(x[layer_idx])) mean_U = wind_flow_90.apply(lambda x: np.nanmean(x[layer_idx])) wd_layer = 270-(np.arctan2(mean_V, mean_U)*180/np.pi) wd_layer[wd_layer > 360] -= 360 wd_layer.name = '{:2.0f}-{:2.0f}m'.format(hgt[layer_idx[0]], hgt[layer_idx[-1]]) return dict(WD=WD, WS=WS, WD_rain=WD_rain, WS_rain=WS_rain, wd_layer=wd_layer, precip=precip, precip_good=precip_good)
def start_df(self, wdir_surf=None, wdir_wprof=None, rain_bby=None,rain_czd=None,nhours=None): ''' this version uses pandas dataframe, it should be more accurate and simpler ''' import pandas as pd bby = parse_data.surface('bby', self.year) czd = parse_data.surface('czd', self.year) wprof = parse_data.windprof(self.year) beg_bby, end_bby = bby.check_beg_end() beg_czd, end_czd = czd.check_beg_end() beg_wpr, end_wpr = wprof.check_beg_end() ''' trim the head and tail of dataset depending on the latest time of the beginning and earliest of the ending ''' time_beg = max(beg_bby, beg_czd, beg_wpr) time_end = min(end_bby, end_czd, end_wpr) ''' initializations ''' onehr = timedelta(hours=1) bool_buffer = np.array([False] * nhours) tta_bool = np.array([]) count = 0 rng = pd.date_range(start=time_beg, end=time_end, freq='1H') cols = ('wssrf','wswpr','wdsrf','wdwpr','rbby','rczd','tta','consecutive') df = pd.DataFrame(index=rng,columns=cols) time = time_beg ''' loop evaluates each time ''' while (time <= time_end): surf_wd = bby.dframe.loc[time].wdir wpr_wd0 = wprof.dframe.loc[time].wdir[0] # first gate pbby = bby.dframe.loc[time].precip pczd = czd.dframe.loc[time].precip if surf_wd is None: surf_wd = np.nan df.loc[time].wdsrf = surf_wd df.loc[time].wdwpr = wpr_wd0 df.loc[time].rbby = pbby df.loc[time].rczd = pczd df.loc[time].wssrf = bby.dframe.loc[time].wspd df.loc[time].wswpr = wprof.dframe.loc[time].wspd[0] ''' check conditions ''' cond1 = (surf_wd <= wdir_surf) cond2 = (wpr_wd0 <= wdir_wprof) if rain_bby and rain_czd: cond3 = (pbby >= rain_bby) cond4 = (pczd >= rain_czd) tta_condition = cond1 and cond2 and cond3 and cond4 elif rain_czd: cond3 = (pczd >= rain_czd) tta_condition = cond1 and cond2 and cond3 elif rain_bby: cond3 = (pbby >= rain_bby) tta_condition = cond1 and cond2 and cond3 else: tta_condition = cond1 and cond2 df.loc[time].tta = tta_condition ''' construct boolean array indicating hourly TTA conditions with minumm of nhours ''' if tta_condition and bool_buffer.all(): tta_bool = np.append(tta_bool, [True]) elif tta_condition: bool_buffer[count] = True count += 1 if bool_buffer.all(): tta_bool = np.append(tta_bool, bool_buffer) else: bufsum = bool_buffer.sum() if bufsum == 0 or bufsum == nhours: tta_bool = np.append(tta_bool, [False]) else: tta_bool = np.append(tta_bool, [False] * (bufsum + 1)) ' reset buffer ' bool_buffer = np.array([False] * nhours) count = 0 time += onehr df.consecutive = tta_bool.astype(bool) ar_wdsrf = df.wdsrf.values.astype(float) ar_wdwpr = df.wdwpr.values.astype(float) ar_rbby = df.rbby.values.astype(float) ar_rczd = df.rczd.values.astype(float) wdsrfIsNan = np.isnan(ar_wdsrf) wdwprIsNan = np.isnan(ar_wdwpr) rbbyIsNan = np.isnan(ar_rbby) rczdIsNan = np.isnan(ar_rczd) if rain_czd is None: exclude = wdsrfIsNan | wdwprIsNan | rbbyIsNan | rczdIsNan elif rain_czd >= 0.25: ''' this boolean excludes dates when there is no precip at CZD ''' zeros = np.zeros((1,len(ar_rbby))) rczdIsZero = np.squeeze(np.equal(ar_rczd,zeros).T) exclude = wdsrfIsNan | wdwprIsNan | rbbyIsNan | rczdIsNan \ | rczdIsZero tot_rbby = np.round(df.rbby.sum(),0).astype(int) tot_rczd = np.round(df.rczd.sum(),0).astype(int) exc_rbby = np.round(df[exclude].rbby.sum(),0).astype(int) exc_rczd = np.round(df[exclude].rczd.sum(),0).astype(int) inc_rbby = tot_rbby - exc_rbby inc_rczd = tot_rczd - exc_rczd tot_hrs = np.round(df.index.size,0).astype(int) exc_hours = np.round(exclude.sum(),0).astype(int) inc_hours = tot_hrs - exc_hours tta_rbby = np.round(df[df.consecutive].rbby.sum(),0).astype(int) tta_rczd = np.round(df[df.consecutive].rczd.sum(),0).astype(int) notta_rbby = inc_rbby - tta_rbby notta_rczd = inc_rczd - tta_rczd exclude_dates = df[exclude].index include_dates = df[~exclude].index tta_dates = df[~exclude & df.consecutive].index notta_dates = df[~exclude & ~df.consecutive].index tta_hours = tta_dates.size notta_hours = notta_dates.size self.time_beg = time_beg self.time_end = time_end self.count_hrs_include = inc_hours self.count_hrs_exclude = exc_hours self.tot_rainfall_bby = tot_rbby self.tot_rainfall_czd = tot_rczd self.inc_rainfall_bby = inc_rbby self.inc_rainfall_czd = inc_rczd self.exc_rainfall_bby = exc_rbby self.exc_rainfall_czd = exc_rczd self.tta_rainfall_bby = tta_rbby self.tta_rainfall_czd = tta_rczd self.notta_rainfall_bby = notta_rbby self.notta_rainfall_czd = notta_rczd self.tta_hours = tta_hours self.notta_hours = notta_hours self.wprof_hgt = wprof.hgt self.exclude_dates = exclude_dates self.include_dates = include_dates self.tta_dates = tta_dates self.notta_dates = notta_dates self.df = df
def start(self, wdir_surf=None, wdir_wprof=None, rain_bby=None,rain_czd=None,nhours=None): ''' this is an old verion prefer start_df that uses pandas dataframe ''' bby = parse_data.surface('bby', self.year) czd = parse_data.surface('czd', self.year) wprof = parse_data.windprof(self.year) beg_bby, end_bby = bby.check_beg_end() beg_czd, end_czd = czd.check_beg_end() beg_wpr, end_wpr = wprof.check_beg_end() ''' the latest of the beg ''' time_beg = max(beg_bby, beg_czd, beg_wpr) ''' the earliest of the end ''' time_end = min(end_bby, end_czd, end_wpr) ''' rainfall before all obs start ''' rbby_before = np.nansum(bby.dframe.loc[:time_beg].precip) rczd_before = np.nansum(czd.dframe.loc[:time_beg].precip) ''' rainfall after all obs end ''' rbby_after = np.nansum(bby.dframe.loc[time_end:].precip) rczd_after = np.nansum(czd.dframe.loc[time_end:].precip) ''' number of windprofiles before (after) all obs start (end) ''' nwprof_before = len(wprof.dframe.loc[:time_beg].wdir) nwprof_after = len(wprof.dframe.loc[time_end:].wdir) onehr = timedelta(hours=1) time = time_beg bool_buffer = np.array([False] * nhours) tta_bool = np.array([]) rainfall_czd = np.array([]) rainfall_bby = np.array([]) # wpr_wd_inc = [] # wpr_ws_inc = [] count = 0 count_while = 0 count_exclude = 0 while (time <= time_end): surf_wd = bby.dframe.loc[time].wdir wpr_wd0 = wprof.dframe.loc[time].wdir[0] # first gate pbby = bby.dframe.loc[time].precip pczd = czd.dframe.loc[time].precip ''' exclude data when there is nan in surf obs or windprof first gate ''' if surf_wd is None or np.isnan(surf_wd) or np.isnan(wpr_wd0): # tta_bool = np.append(tta_bool, [False]) count_exclude += 1 time += onehr continue ''' these are obs included in the analysis, then we determine if they are tta or no-tta ''' rainfall_bby=np.append(rainfall_bby,pbby) rainfall_czd=np.append(rainfall_czd,pczd) ''' check conditions ''' cond1 = (surf_wd <= wdir_surf) cond2 = (wpr_wd0 <= wdir_wprof) if rain_bby and rain_czd: cond3 = (pbby >= rain_bby) cond4 = (pczd >= rain_czd) tta_condition = cond1 and cond2 and \ cond3 and cond4 elif rain_czd: cond3 = (pczd >= rain_czd) tta_condition = cond1 and cond2 and cond3 elif rain_bby: cond3 = (pbby >= rain_bby) tta_condition = cond1 and cond2 and cond3 else: tta_condition = cond1 and cond2 ''' construct boolean array indicating hourly TTA conditions with minumm of nhours ''' if tta_condition and bool_buffer.all(): tta_bool = np.append(tta_bool, [True]) elif tta_condition: bool_buffer[count] = True count += 1 if bool_buffer.all(): tta_bool = np.append(tta_bool, bool_buffer) else: bufsum = bool_buffer.sum() if bufsum == 0 or bufsum == nhours: tta_bool = np.append(tta_bool, [False]) else: tta_bool = np.append(tta_bool, [False] * (bufsum + 1)) # reset buffer bool_buffer = np.array([False] * nhours) count = 0 count_while += 1 time += onehr tta_bool = np.array(tta_bool).astype(bool) tta_hours = tta_bool.sum() notta_hours = count_while-tta_hours self.tta_hours = tta_hours self.notta_hours = notta_hours self.time_beg = time_beg self.time_end = time_end self.count_while = count_while self.count_exclude = count_exclude self.total_rainfall_bby = np.nansum(rainfall_bby) self.total_rainfall_czd = np.nansum(rainfall_czd) self.bool = tta_bool self.tta_rainfall_czd = np.nansum(rainfall_czd[tta_bool]) self.tta_rainfall_bby = np.nansum(rainfall_bby[tta_bool]) self.notta_rainfall_czd = np.nansum(rainfall_czd[~tta_bool]) self.notta_rainfall_bby = np.nansum(rainfall_bby[~tta_bool]) self.rainfall_bby_before_analysis = rbby_before self.rainfall_bby_after_analysis = rbby_after self.rainfall_czd_before_analysis = rczd_before self.rainfall_czd_after_analysis = rczd_after self.nwprof_before = nwprof_before self.nwprof_after = nwprof_after self.wprof_hgt = wprof.hgt print('TTA analysis finished')
def start(self, wdir_surf=None, wdir_wprof=None, rain_bby=None,rain_czd=None,nhours=None): ''' this is an old verion prefer start_df that uses pandas dataframe for analysis ''' bby = parse_data.surface('bby', self.year) czd = parse_data.surface('czd', self.year) wprof = parse_data.windprof(self.year) beg_bby, end_bby = bby.check_beg_end() beg_czd, end_czd = czd.check_beg_end() beg_wpr, end_wpr = wprof.check_beg_end() ''' the latest of the beg ''' time_beg = max(beg_bby, beg_czd, beg_wpr) ''' the earliest of the end ''' time_end = min(end_bby, end_czd, end_wpr) ''' rainfall before all obs start ''' rbby_before = np.nansum(bby.dframe.loc[:time_beg].precip) rczd_before = np.nansum(czd.dframe.loc[:time_beg].precip) ''' rainfall after all obs end ''' rbby_after = np.nansum(bby.dframe.loc[time_end:].precip) rczd_after = np.nansum(czd.dframe.loc[time_end:].precip) ''' number of windprofiles before (after) all obs start (end) ''' nwprof_before = len(wprof.dframe.loc[:time_beg].wdir) nwprof_after = len(wprof.dframe.loc[time_end:].wdir) onehr = timedelta(hours=1) time = time_beg bool_buffer = np.array([False] * nhours) tta_bool = np.array([]) rainfall_czd = np.array([]) rainfall_bby = np.array([]) # wpr_wd_inc = [] # wpr_ws_inc = [] count = 0 count_while = 0 count_exclude = 0 while (time <= time_end): surf_wd = bby.dframe.loc[time].wdir wpr_wd0 = wprof.dframe.loc[time].wdir[0] # first gate pbby = bby.dframe.loc[time].precip pczd = czd.dframe.loc[time].precip ''' exclude data when there is nan in surf obs or windprof first gate ''' if surf_wd is None or np.isnan(surf_wd) or np.isnan(wpr_wd0): # tta_bool = np.append(tta_bool, [False]) count_exclude += 1 time += onehr continue ''' these are obs included in the analysis, then we determine if they are tta or no-tta ''' rainfall_bby=np.append(rainfall_bby,pbby) rainfall_czd=np.append(rainfall_czd,pczd) ''' check conditions ''' cond1 = (surf_wd <= wdir_surf) cond2 = (wpr_wd0 <= wdir_wprof) if rain_bby and rain_czd: cond3 = (pbby >= rain_bby) cond4 = (pczd >= rain_czd) tta_condition = cond1 and cond2 and \ cond3 and cond4 elif rain_czd: cond3 = (pczd >= rain_czd) tta_condition = cond1 and cond2 and cond3 elif rain_bby: cond3 = (pbby >= rain_bby) tta_condition = cond1 and cond2 and cond3 else: tta_condition = cond1 and cond2 ''' construct boolean array indicating hourly TTA conditions with minumm of nhours ''' if tta_condition and bool_buffer.all(): tta_bool = np.append(tta_bool, [True]) elif tta_condition: bool_buffer[count] = True count += 1 if bool_buffer.all(): tta_bool = np.append(tta_bool, bool_buffer) else: bufsum = bool_buffer.sum() if bufsum == 0 or bufsum == nhours: tta_bool = np.append(tta_bool, [False]) else: tta_bool = np.append(tta_bool, [False] * (bufsum + 1)) # reset buffer bool_buffer = np.array([False] * nhours) count = 0 count_while += 1 time += onehr tta_bool = np.array(tta_bool).astype(bool) tta_hours = tta_bool.sum() notta_hours = count_while-tta_hours self.tta_hours = tta_hours self.notta_hours = notta_hours self.time_beg = time_beg self.time_end = time_end self.count_while = count_while self.count_exclude = count_exclude self.total_rainfall_bby = np.nansum(rainfall_bby) self.total_rainfall_czd = np.nansum(rainfall_czd) self.bool = tta_bool self.tta_rainfall_czd = np.nansum(rainfall_czd[tta_bool]) self.tta_rainfall_bby = np.nansum(rainfall_bby[tta_bool]) self.notta_rainfall_czd = np.nansum(rainfall_czd[~tta_bool]) self.notta_rainfall_bby = np.nansum(rainfall_bby[~tta_bool]) self.rainfall_bby_before_analysis = rbby_before self.rainfall_bby_after_analysis = rbby_after self.rainfall_czd_before_analysis = rczd_before self.rainfall_czd_after_analysis = rczd_after self.nwprof_before = nwprof_before self.nwprof_after = nwprof_after self.wprof_hgt = wprof.hgt print('TTA analysis finished')
def start_df_layer(self, wdir_thres = None, wdir_layer = [None,None], # [meters] rain_bby = None, rain_czd = None, nhours = None): ''' this version uses pandas dataframe similar to start_df but uses a layer instead of a level ''' import pandas as pd bby = parse_data.surface('bby', self.year) czd = parse_data.surface('czd', self.year) wprof = parse_data.windprof(self.year) beg_bby, end_bby = bby.check_beg_end() beg_czd, end_czd = czd.check_beg_end() beg_wpr, end_wpr = wprof.check_beg_end() ''' trim the head and tail of dataset depending on the latest time of the beginning and earliest of the ending ''' time_beg = max(beg_bby, beg_czd, beg_wpr) time_end = min(end_bby, end_czd, end_wpr) ''' initializations ''' onehr = timedelta(hours=1) bool_buffer = np.array([False] * nhours) tta_bool = np.array([]) count = 0 rng = pd.date_range(start = time_beg, end = time_end, freq = '1H') idx = np.where((wprof.hgt>=wdir_layer[0]) & (wprof.hgt<wdir_layer[1]))[0] wphgt = wprof.hgt[idx] ''' columns included in the dataframe ''' cols = [] wdircol = 'wd_{}-{:2.0f}m'.format(wdir_layer[0],wphgt[-1]) cols.append(wdircol) cols.append('rbby') cols.append('rczd') cols.append('tta') cols.append('consecutive') ''' create dataframe ''' df = pd.DataFrame(index=rng,columns=cols) ''' loop evaluates each time ''' time = time_beg while (time <= time_end): if wdir_layer[0] == 0: surf_wd = np.array(bby.dframe.loc[time].wdir) surf_ws = np.array(bby.dframe.loc[time].wspd) else: surf_wd = np.array([]) surf_ws = np.array([]) wpro_wd = np.array(wprof.dframe.loc[time].wdir)[idx] wpro_ws = np.array(wprof.dframe.loc[time].wspd)[idx] wd = np.append(surf_wd,wpro_wd) ws = np.append(surf_ws,wpro_ws) u = -ws*np.sin(np.radians(wd)) v = -ws*np.cos(np.radians(wd)) u_mean = u.mean() v_mean = v.mean() # ws_mean = np.sqrt(u_mean**2+v_mean**2) wd_mean = 270 - np.arctan2(v_mean,u_mean)*180./np.pi if wd_mean > 360: wd_mean -= 360 df.loc[time][wdircol] = wd_mean pbby = bby.dframe.loc[time].precip df.loc[time].rbby = pbby pczd = czd.dframe.loc[time].precip df.loc[time].rczd = pczd # df.loc[time].wssrf = bby.dframe.loc[time].wspd # df.loc[time].wswpr = wprof.dframe.loc[time].wspd[0] ''' check conditions ''' if wdir_thres: if isinstance(wdir_thres,int): cond1 = (wd_mean < wdir_thres) elif isinstance(wdir_thres,str): cond1 = parse_operator(wd_mean,wdir_thres) if rain_czd: cond3 = (pczd >= rain_czd) if rain_bby: cond4 = (pbby >= rain_bby) ''' create joint condition ''' if wdir_thres and rain_bby and rain_czd: tta_condition = cond1 and cond3 and cond4 elif wdir_thres and rain_czd: tta_condition = cond1 and cond3 elif wdir_thres and rain_bby: tta_condition = cond1 and cond4 else: tta_condition = cond1 df.loc[time].tta = tta_condition ''' construct boolean array indicating hourly TTA conditions with minimum of nhours ''' if tta_condition and bool_buffer.all(): tta_bool = np.append(tta_bool, [True]) elif tta_condition: bool_buffer[count] = True count += 1 if bool_buffer.all(): tta_bool = np.append(tta_bool, bool_buffer) else: bufsum = bool_buffer.sum() if bufsum == 0 or bufsum == nhours: tta_bool = np.append(tta_bool, [False]) else: tta_bool = np.append(tta_bool, [False] * (bufsum + 1)) ' reset buffer ' bool_buffer = np.array([False] * nhours) count = 0 # time += onehr df.consecutive = tta_bool.astype(bool) ar_wdir = df[wdircol].values.astype(float) ar_rbby = df.rbby.values.astype(float) ar_rczd = df.rczd.values.astype(float) wdirIsNan = np.isnan(ar_wdir) rbbyIsNan = np.isnan(ar_rbby) rczdIsNan = np.isnan(ar_rczd) if rain_czd is None: exclude = wdirIsNan | rbbyIsNan | rczdIsNan elif rain_czd >= 0.25: ''' this boolean excludes dates when there is no precip at CZD ''' zeros = np.zeros((1,len(ar_rbby))) rczdIsZero = np.squeeze(np.equal(ar_rczd,zeros).T) exclude = wdirIsNan | rbbyIsNan | rczdIsNan | rczdIsZero tot_rbby = np.round(df.rbby.sum(),3) tot_rczd = np.round(df.rczd.sum(),3) exc_rbby = np.round(df[exclude].rbby.sum(),3) exc_rczd = np.round(df[exclude].rczd.sum(),3) inc_rbby = tot_rbby - exc_rbby inc_rczd = tot_rczd - exc_rczd tot_hrs = np.round(df.index.size,0).astype(int) exc_hours = np.round(exclude.sum(),0).astype(int) inc_hours = tot_hrs - exc_hours tta_rbby = np.round(df[df.consecutive].rbby.sum(),3) tta_rczd = np.round(df[df.consecutive].rczd.sum(),3) notta_rbby = inc_rbby - tta_rbby notta_rczd = inc_rczd - tta_rczd exclude_dates = df[exclude].index include_dates = df[~exclude].index tta_dates = df[~exclude & df.consecutive].index notta_dates = df[~exclude & ~df.consecutive].index tta_hours = tta_dates.size notta_hours = notta_dates.size self.time_beg = time_beg self.time_end = time_end self.count_hrs_include = inc_hours self.count_hrs_exclude = exc_hours self.tot_rainfall_bby = tot_rbby self.tot_rainfall_czd = tot_rczd self.inc_rainfall_bby = inc_rbby self.inc_rainfall_czd = inc_rczd self.exc_rainfall_bby = exc_rbby self.exc_rainfall_czd = exc_rczd self.tta_rainfall_bby = tta_rbby self.tta_rainfall_czd = tta_rczd self.notta_rainfall_bby = notta_rbby self.notta_rainfall_czd = notta_rczd self.tta_hours = tta_hours self.notta_hours = notta_hours self.wprof_hgt = wprof.hgt self.exclude_dates = exclude_dates self.include_dates = include_dates self.tta_dates = tta_dates self.notta_dates = notta_dates self.df = df
def start_df(self, wdir_surf = None, wdir_wprof = None, wprof_gate = 0, rain_bby = None, rain_czd = None, nhours = None): ''' this version uses pandas dataframe, it should be more accurate and simpler than start method ''' import pandas as pd bby = parse_data.surface('bby', self.year) czd = parse_data.surface('czd', self.year) wprof = parse_data.windprof(self.year) beg_bby, end_bby = bby.check_beg_end() beg_czd, end_czd = czd.check_beg_end() beg_wpr, end_wpr = wprof.check_beg_end() ''' trim the head and tail of dataset depending on the latest time of the beginning and earliest of the ending ''' time_beg = max(beg_bby, beg_czd, beg_wpr) time_end = min(end_bby, end_czd, end_wpr) ''' initializations ''' onehr = timedelta(hours=1) bool_buffer = np.array([False] * nhours) tta_bool = np.array([]) count = 0 rng = pd.date_range(start=time_beg, end=time_end, freq='1H') ''' columns included in the dataframe ''' cols = [] cols.append('wdsrf') wprofcol = 'wdwpr{}'.format(wprof_gate) cols.append(wprofcol) cols.append('rbby') cols.append('rczd') cols.append('tta') cols.append('consecutive') ''' create dataframe ''' df = pd.DataFrame(index=rng,columns=cols) ''' loop evaluates each time ''' time = time_beg while (time <= time_end): surf_wd = bby.dframe.loc[time].wdir df.loc[time].wdsrf = surf_wd wpr_wd0 = wprof.dframe.loc[time].wdir[wprof_gate] df.loc[time][wprofcol] = wpr_wd0 pbby = bby.dframe.loc[time].precip df.loc[time].rbby = pbby pczd = czd.dframe.loc[time].precip df.loc[time].rczd = pczd # if surf_wd is None: # surf_wd = np.nan # df.loc[time].wssrf = bby.dframe.loc[time].wspd # df.loc[time].wswpr = wprof.dframe.loc[time].wspd[0] ''' check conditions ''' if wdir_surf: if isinstance(wdir_surf,int): cond1 = (surf_wd <= wdir_surf) elif isinstance(wdir_surf,str): cond1 = parse_operator(surf_wd,wdir_surf) if wdir_wprof: if isinstance(wdir_wprof,int): cond2 = (wpr_wd0 <= wdir_wprof) elif isinstance(wdir_wprof,str): cond2 = parse_operator(wpr_wd0,wdir_wprof) if rain_czd: cond3 = (pczd >= rain_czd) if rain_bby: cond4 = (pbby >= rain_bby) ''' create joint condition ''' if wdir_surf and wdir_wprof and rain_bby and rain_czd: tta_condition = cond1 and cond2 and cond3 and cond4 elif wdir_surf and wdir_wprof and rain_czd: tta_condition = cond1 and cond2 and cond3 elif wdir_surf and wdir_wprof and rain_bby: tta_condition = cond1 and cond2 and cond4 elif wdir_surf and rain_czd: tta_condition = cond1 and cond3 elif wdir_wprof and rain_czd: tta_condition = cond2 and cond3 elif wdir_surf and rain_bby: tta_condition = cond1 and cond4 elif wdir_wprof and rain_bby: tta_condition = cond2 and cond4 elif wdir_surf and wdir_wprof: tta_condition = cond1 and cond2 else: tta_condition = cond1 df.loc[time].tta = tta_condition ''' construct boolean array indicating hourly TTA conditions with minumm of nhours ''' if tta_condition and bool_buffer.all(): tta_bool = np.append(tta_bool, [True]) elif tta_condition: bool_buffer[count] = True count += 1 if bool_buffer.all(): tta_bool = np.append(tta_bool, bool_buffer) else: bufsum = bool_buffer.sum() if bufsum == 0 or bufsum == nhours: tta_bool = np.append(tta_bool, [False]) else: tta_bool = np.append(tta_bool, [False] * (bufsum + 1)) ' reset buffer ' bool_buffer = np.array([False] * nhours) count = 0 time += onehr df.consecutive = tta_bool.astype(bool) ar_wdsrf = df.wdsrf.values.astype(float) ar_wdwpr = df[wprofcol].values.astype(float) ar_rbby = df.rbby.values.astype(float) ar_rczd = df.rczd.values.astype(float) wdsrfIsNan = np.isnan(ar_wdsrf) wdwprIsNan = np.isnan(ar_wdwpr) rbbyIsNan = np.isnan(ar_rbby) rczdIsNan = np.isnan(ar_rczd) if rain_czd is None: exclude = wdsrfIsNan | wdwprIsNan | rbbyIsNan | rczdIsNan elif rain_czd >= 0.25: ''' this boolean excludes dates when there is no precip at CZD ''' zeros = np.zeros((1,len(ar_rbby))) rczdIsZero = np.squeeze(np.equal(ar_rczd,zeros).T) exclude = wdsrfIsNan | wdwprIsNan | rbbyIsNan | rczdIsNan \ | rczdIsZero tot_rbby = np.round(df.rbby.sum(),3) tot_rczd = np.round(df.rczd.sum(),3) exc_rbby = np.round(df[exclude].rbby.sum(),3) exc_rczd = np.round(df[exclude].rczd.sum(),3) inc_rbby = tot_rbby - exc_rbby inc_rczd = tot_rczd - exc_rczd tot_hrs = np.round(df.index.size,0).astype(int) exc_hours = np.round(exclude.sum(),0).astype(int) inc_hours = tot_hrs - exc_hours tta_rbby = np.round(df[df.consecutive].rbby.sum(),3) tta_rczd = np.round(df[df.consecutive].rczd.sum(),3) notta_rbby = inc_rbby - tta_rbby notta_rczd = inc_rczd - tta_rczd exclude_dates = df[exclude].index include_dates = df[~exclude].index tta_dates = df[~exclude & df.consecutive].index notta_dates = df[~exclude & ~df.consecutive].index tta_hours = tta_dates.size notta_hours = notta_dates.size self.time_beg = time_beg self.time_end = time_end self.count_hrs_include = inc_hours self.count_hrs_exclude = exc_hours self.tot_rainfall_bby = tot_rbby self.tot_rainfall_czd = tot_rczd self.inc_rainfall_bby = inc_rbby self.inc_rainfall_czd = inc_rczd self.exc_rainfall_bby = exc_rbby self.exc_rainfall_czd = exc_rczd self.tta_rainfall_bby = tta_rbby self.tta_rainfall_czd = tta_rczd self.notta_rainfall_bby = notta_rbby self.notta_rainfall_czd = notta_rczd self.tta_hours = tta_hours self.notta_hours = notta_hours self.wprof_hgt = wprof.hgt self.exclude_dates = exclude_dates self.include_dates = include_dates self.tta_dates = tta_dates self.notta_dates = notta_dates self.df = df
def preprocess(years=None, layer=None, verbose=True): import pandas as pd import parse_data WD = pd.Series() WS = pd.Series() WD_rain = pd.Series() WS_rain = pd.Series() precip_good = pd.DataFrame() for year in years: wpr = parse_data.windprof(year=year) bby = parse_data.surface('bby', year=year) czd = parse_data.surface('czd', year=year) hgt = wpr.hgt ' find common time period ' first_bby = bby.dframe.index[0] first_czd = czd.dframe.index[0] first_wpr = wpr.dframe.index[0] last_bby = bby.dframe.index[-1] last_czd = czd.dframe.index[-1] last_wpr = wpr.dframe.index[-1] first = max(first_bby, first_czd, first_wpr) last = min(last_bby, last_czd, last_wpr) ' reduce time interval so all start and end at same time ' wpr = wpr.dframe.loc[first:last] bby = bby.dframe.loc[first:last] czd = czd.dframe.loc[first:last] ' append surface values to windprof to make entire profile ' surf_wsp = iter(bby.wspd.values.tolist()) surf_wdr = iter(bby.wdir.values.tolist()) wsp = wpr.wspd.map(lambda x: [surf_wsp.next()] + x) wdr = wpr.wdir.map(lambda x: [surf_wdr.next()] + x) hgt = np.append([0], hgt) ' check nans on precip ' precip = pd.concat([bby.precip, czd.precip], axis=1) precip.columns = ['bby', 'czd'] precip_nans = precip.apply(lambda x: x.isnull().any(), axis=1, reduce=True) precip_nans.name = 'precip_nan' tx = 'year:{}, any_precip_nan:{:4d}' if verbose: print(tx.format(year, precip_nans.sum())) ' check entire profile nans ( same for ws and wd)' prof_nans = wsp.apply(lambda x: np.isnan(x).all()) prof_nans.name = 'prof_nan' ' include only hours when surf and the entire' \ ' profile is non-missing (profile is allowed to have' \ ' at least one non-missing)' nan_df = pd.concat([precip_nans, prof_nans], axis=1) any_nan = nan_df.apply(lambda x: x.any(), axis=1, reduce=True) include = ~any_nan precip_good = precip_good.append(precip[include]) ' rainy days at CZD ' rain_czd = czd.precip > 0 ' reduce and save to big Series ' wdr = wdr[include] wsp = wsp[include] wdr_rain = wdr[rain_czd] wsp_rain = wsp[rain_czd] WD = WD.append(wdr) WS = WS.append(wsp) WD_rain = WD_rain.append(wdr_rain) WS_rain = WS_rain.append(wsp_rain) " compute components " WD_sin = WD.apply(lambda x: sin(x)) WD_cos = WD.apply(lambda x: cos(x)) U_df = -1 * WS.multiply(WD_sin) V_df = -1 * WS.multiply(WD_cos) wind_flow_180 = -(U_df * sin(180) + V_df * cos(180)) wind_flow_90 = U_df * sin(90) + V_df * cos(90) " layer-mean" layer_idx = np.where((hgt >= layer[0]) & (hgt < layer[1]))[0] mean_V = wind_flow_180.apply(lambda x: np.nanmean(x[layer_idx])) mean_U = wind_flow_90.apply(lambda x: np.nanmean(x[layer_idx])) wd_layer = 270 - (np.arctan2(mean_V, mean_U) * 180 / np.pi) wd_layer[wd_layer > 360] -= 360 wd_layer.name = '{:2.0f}-{:2.0f}m'.format(hgt[layer_idx[0]], hgt[layer_idx[-1]]) return dict(WD=WD, WS=WS, WD_rain=WD_rain, WS_rain=WS_rain, wd_layer=wd_layer, precip=precip, precip_good=precip_good)