def read_PRMS(files, datadir, avg_per, std_rolling_mean_window): timepers = sorted([f.split('.')[3] for f in files]) starts = defaultdict() ends = defaultdict() Std = defaultdict() Std_sm = defaultdict() data = defaultdict() for f in files: print f timeper = f.split('.')[3] df, timeper, Ncolumns, header = read_datafile(datadir, f) data[timeper] = df # save data for timeper to dict for later # calculate daily means before and after the time gap starts, ends = calc_daily_means(df, timeper, timepers, starts, ends, avg_per) # calculate daily standard deviations before and after the time gap Std[timeper] = df.groupby([lambda x: x.month, lambda x: x.day]).std() # smooth standard deviations using rolling mean (moving window) # start by extending daily std values by 1/2 of window size insert, append = 0.5 * std_rolling_mean_window, 0.5 * std_rolling_mean_window - 1 Std_extended = Std[timeper][-insert:].append(Std[timeper]).append( Std[timeper][0:append]) Std_smoothed = pd.rolling_window(Std_extended, std_rolling_mean_window, 'boxcar', center='true') Std_sm[timeper] = Std_smoothed[insert:-append] return (data, timepers, starts, ends, Std, Std_sm, Ncolumns, header)
def read_PRMS(files,datadir,avg_per,std_rolling_mean_window): timepers=sorted([f.split('.')[3] for f in files]) starts=defaultdict() ends=defaultdict() Std=defaultdict() Std_sm=defaultdict() data=defaultdict() for f in files: print f timeper=f.split('.')[3] df,timeper,Ncolumns,header=read_datafile(datadir,f) data[timeper]=df # save data for timeper to dict for later # calculate daily means before and after the time gap starts,ends=calc_daily_means(df,timeper,timepers,starts,ends,avg_per) # calculate daily standard deviations before and after the time gap Std[timeper]=df.groupby([lambda x: x.month,lambda x: x.day]).std() # smooth standard deviations using rolling mean (moving window) # start by extending daily std values by 1/2 of window size insert,append = 0.5*std_rolling_mean_window,0.5*std_rolling_mean_window-1 Std_extended=Std[timeper][-insert:].append(Std[timeper]).append(Std[timeper][0:append]) Std_smoothed=pd.rolling_window(Std_extended,std_rolling_mean_window,'boxcar',center='true') Std_sm[timeper]=Std_smoothed[insert:-append] return(data,timepers,starts,ends,Std,Std_sm,Ncolumns,header)
def rolling_functions_tests(p, d): # Old-fashioned rolling API assert_eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) assert_eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) assert_eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) assert_eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) assert_eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) assert_eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) assert_eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) assert_eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) # see note around test_rolling_dataframe for logic concerning precision assert_eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3), check_less_precise=True) assert_eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3), check_less_precise=True) assert_eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) assert_eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) assert_eq(pd.rolling_window(p, 3, win_type='boxcar'), dd.rolling_window(d, 3, win_type='boxcar')) # Test with edge-case window sizes assert_eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) assert_eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs assert_eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def rolling_functions_tests(p, d): # Old-fashioned rolling API assert_eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) assert_eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) assert_eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) assert_eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) assert_eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) assert_eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) assert_eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) assert_eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) # see note around test_rolling_dataframe for logic concerning precision assert_eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3), check_less_precise=True) assert_eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3), check_less_precise=True) assert_eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) assert_eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) with ignoring(ImportError): assert_eq(pd.rolling_window(p, 3, 'boxcar'), dd.rolling_window(d, 3, 'boxcar')) # Test with edge-case window sizes assert_eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) assert_eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs assert_eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def moving_avg_from_csvs(csvs, gcms, window, spinup, function='boxcar', time_units='D'): dfs = {} for csv in csvs.keys(): # load csvs into Pandas dataframes; reduce to columns of interest df = pd.read_csv(csvs[csv], index_col='Date', parse_dates=True) try: df = df[gcms] except KeyError: pass # trim spinup time from data to plot t0 = df.index[0] tspinup = np.datetime64(t0 + pd.DateOffset(years=spinup)) df = df[tspinup:] # resample at daily interval so that time gaps are filled with NaNs df = df.resample(time_units) # smooth each column with moving average df = pd.rolling_window(df, window, function, center='true') dfs[csv] = df return dfs
def gaussianMovingAverage(pdata,idata, windowSize, StDev): precip = pd.rolling_window(pdata.mean(axis=1), window=windowSize, win_type='gaussian', std=StDev) end = min(precip.index[-1], idata.index[-1]) iclip = idata.mean(axis=1)[:end] pclip = precip[iclip.index[0]:end:10] #np.testing.assert_array_equal(iclip.index, pclip.index) return pclip, iclip
def smooth(options): """ """ kwargs = {'center': options.center} if options.smooth_function == 'gaussian': kwargs.update({'std': options.gs}) elif options.smooth_function == 'general_gaussian': kwargs = ({'power': options.ggp, 'width': options.ggw}) elif options.smooth_function == 'kaiser': kwargs.update({'beta': options.kb}) elif options.smooth_function == 'slepian': kwargs.update({'width': options.sw}) df = pd.read_csv(options.infile, sep='\t', index_col=None, names=['chr', 'start', 'end', 'name', 'value', 'strand'], dtype={'strand': object}) df['aggregate'] = df.groupby(['chr']).apply(lambda x: pd.rolling_window( x['value'], options.window_length, options.smooth_function, **kwargs)) df.to_csv(options.outfile, index=False, header=False, sep='\t', na_rep='0', cols=['chr', 'start', 'end', 'name', 'aggregate', 'strand'], float_format='%.2f')
def rolling(self, win_type, window): """Calculate a rolling window over all numeric columns. :param win_type: The type of window, see pandas pandas.rolling_window. :param window: The number of observations used for calculating the window. :returns: A BambooFrame of the rolling window calculated for this dataset. """ dframe = self.dframe()[self.schema.numeric_slugs] return BambooFrame(rolling_window(dframe, window, win_type))
def rolling(self, win_type, window): """Calculate a rolling window over all numeric columns. :param win_type: The type of window, see pandas pandas.rolling_window. :param window: The number of observations used for calculating the window. :returns: A DataFrame of the rolling window calculated for this dataset. """ dframe = self.dframe(QueryArgs(select=self.schema.numerics_select)) return rolling_window(dframe, window, win_type)
def window(self, block, props): ''' props must be a list of properties ''' for i in props: vals = pd.rolling_window(self[i].values, window=block, win_type='boxcar', center=True) cond = np.argwhere(np.isnan(vals)) vals[cond] = self[i].values[cond] self[i] = vals return self
def _compute_sig_diff_ewma(self): """ Internal method to compute the EWMA-filtered time-derivative of the smoothed signal. """ windowsize = int(10 * self.smooth_std) # 5 sigma windowsize sig_smooth = pd.rolling_window(self.signal, windowsize, 'gaussian', std=self.smooth_std) sig_diff = np.diff(sig_smooth) sig_diff_ewma = pd.ewma(sig_diff, halflife=self.halflife, adjust=False, ignore_na=True) non_nan_cols = np.nonzero(~np.isnan(sig_diff_ewma))[0] self.sig_diff_ewma = sig_diff_ewma[non_nan_cols] self.non_nan_cols = non_nan_cols
def rolling_tests(p, d): eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3)) eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3)) eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) mad = lambda x: np.fabs(x - x.mean()).mean() eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) eq(pd.rolling_window(p, 3, 'boxcar'), dd.rolling_window(d, 3, 'boxcar')) # Test with edge-case window sizes eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def rolling_functions_tests(p, d): # Old-fashioned rolling API eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3)) eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3)) eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) with ignoring(ImportError): eq(pd.rolling_window(p, 3, "boxcar"), dd.rolling_window(d, 3, "boxcar")) # Test with edge-case window sizes eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def rolling_functions_tests(p, d): # Old-fashioned rolling API eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3)) eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3)) eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) with ignoring(ImportError): eq(pd.rolling_window(p, 3, 'boxcar'), dd.rolling_window(d, 3, 'boxcar')) # Test with edge-case window sizes eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def smooth( options ): """ """ kwargs = {'center': options.center} if options.smooth_function == 'gaussian': kwargs.update({'std': options.gs}) elif options.smooth_function == 'general_gaussian': kwargs = ({'power': options.ggp, 'width': options.ggw}) elif options.smooth_function == 'kaiser': kwargs.update({'beta': options.kb}) elif options.smooth_function == 'slepian': kwargs.update({'width': options.sw}) df = pd.read_csv( options.infile, sep='\t', index_col=None, names=['chr', 'start', 'end', 'name', 'value', 'strand'], dtype={'strand': object} ) df['aggregate'] = df.groupby( ['chr'] ).apply(lambda x: pd.rolling_window(x['value'], options.window_length, options.smooth_function, **kwargs ) ) df.to_csv(options.outfile, index=False, header=False, sep='\t', na_rep='0', cols=['chr', 'start', 'end', 'name', 'aggregate', 'strand'], float_format='%.2f' )
def rolling_tests(p, d): eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3)) eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3)) eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) mad = lambda x: np.fabs(x - x.mean()).mean() eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) with ignoring(ImportError): eq(pd.rolling_window(p, 3, 'boxcar'), dd.rolling_window(d, 3, 'boxcar')) # Test with edge-case window sizes eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def rolling_window(self, *args, **kwargs): return MySeries(pd.rolling_window(self.x, *args, **kwargs))
# This still needs to be constrained by time of day, i.e., the afternoon periods should be cut # # First pull just the data with zero snowpack and non-rain days (I wish we had hourly precip!) ds = data[:, 3] no_snow = ds == 0.0 dr = data[:, 2] no_rain = dr == 0.0 mask = np.logical_and(no_snow, no_rain) elig_data = data[mask] df = pd.DataFrame(elig_data[:, :], columns=['date', 'discharge', 'precipitation', 'snow']) # The following data are very noisy, try smoothing it out, pandas has lots of options to # experiment with ser = pd.Series(df['discharge']) roll = pd.rolling_mean(ser, 300) hamming = pd.rolling_window(ser, 1000, 'hamming') df['hamming'] = hamming df['rolling'] = roll # Plot smoothed data for inspection # fig, ax = plt.subplots(1, figsize=(15, 5)) # ax.plot(df['date'], df['hamming'], 'g', label='Hamming Discharge (cfs) Window = 1000') # ax.plot(df['date'], df['rolling'], 'r', label='Rolling Mean Discharge (cfs) Window= 300') # ax.plot(df['date'], df['discharge'], 'b', label='Measured Discharge (cfs)', alpha=0.3) # ax.set_ylabel('Discharge (cfs)', color='k') # ax.set_xlabel('Date') # # plt.ylim(0.0, 1.0) # for tl in ax.get_yticklabels(): # tl.set_color('b') # plt.title('Summer Hydrograph') # plt.legend()
def plot_moving_avg_minmax(csvs,cols,timeunits,window,function,title,ylabel,colors,spinup,Synthetic_timepers): # csvs= list of csv files with multi-column timeseries # cols= list of column names to include in plot # timeunits= Pandas time units (e.g. 'D' for days) # window= width of moving avg window in timeunits # function= moving avg. fn to use (see Pandas doc) # title= plot title, ylabel= y-axis label # spinup= length of time (years) to trim off start of results when model is 'spinning up' # initialize plot fig=plt.figure() hatches=["","|","-",""] transp=[.3,.3,.3,.3] for i in range(len(csvs)): # load csvs into Pandas dataframes; reduce to columns of interest df=pd.read_csv(csvs[i], index_col='Date', parse_dates=True) try: df=df[cols] except KeyError: pass # trim spinup time from data to plot t0=df.index[0] tspinup=np.datetime64(t0+pd.DateOffset(years=spinup)) df=df[tspinup:] # resample at daily interval so that time gaps are filled with NaNs df_rs=df.resample('D') # smooth each column with moving average smoothed=pd.rolling_window(df_rs,window,function,center='true') # plot out mean, max and min scenario = os.path.split(csvs[i])[1].split('.')[1] try: ax=smoothed.mean(axis=1).plot(color=colors[i],label=scenario) except TypeError: print("Problem plotting timeseries. Check that spinup value was not entered for plotting after spinup results already discarded during aggregation.") ax.fill_between(smoothed.index,smoothed.max(axis=1),smoothed.min(axis=1),alpha=transp[i],color=colors[i],edgecolor='k',linewidth=0.25) # more plot settings wrap=60 title="\n".join(textwrap.wrap(title, wrap)) #wrap title plt.subplots_adjust(top=0.85) ax.set_title(title) ax.grid(False) handles,labels=ax.get_legend_handles_labels() if len(Synthetic_timepers)>0: handles.append(plt.Rectangle((0,0),1,1,color='0.9', linewidth=2)) labels.append('synthetic input data') ax.legend(handles,labels,title='Emissions scenarios',loc='best') ax.set_ylabel(ylabel) ax.ticklabel_format(style='sci', axis='y', scilimits=(-3,3)) window_yr=window/365.0 ax.set_xlabel('Center of %s year moving window' %(window_yr)) # shade periods for which synthetic data were generated if len(Synthetic_timepers)>0: for per in Synthetic_timepers: tstart,tend=list(map(int,per.split('-'))) daterange=pd.date_range(start=dt.datetime(tstart,1,1),end=dt.datetime(tend,1,1)) # make vectors of ymax values and ymin values for length of daterange ymax,ymin=np.ones(len(daterange))*plt.ylim()[1],np.ones(len(daterange))*plt.ylim()[0] syn=ax.fill_between(daterange,ymax,ymin,color='0.9',zorder=0)
Thanks mchan on freenode ##machine-learning for guiding me on rolling window and such ''' from sklearn import tree, linear_model, neighbors, cross_validation import pandas as pd import numpy data_labels = ["Happiness", "Motivation", "Flexibility", "Strength", "Endurance", "Relationships"] data_frame = pd.read_csv("personal_stats2.csv") data_frame = data_frame[data_labels + ["Datetime"]] # Apply rolling window to data rolling_window_size = 16 series = data_frame.set_index('Datetime') series = pd.rolling_window(series, rolling_window_size, 'boxcar') data_frame = pd.DataFrame(series, columns=data_labels) # Get 80% of our dataset index_at_80_percent = int(len(data_frame) * .8) # Get the first 80% as input and the following day as the target result # Skip first 6 as rolling window didn't apply to them training_input = data_frame[rolling_window_size:index_at_80_percent] training_target = data_frame[rolling_window_size + 1:index_at_80_percent + 1] #============================================================================= # Uncomment to select a method #============================================================================= # Score: 437 with 'blackman' rolling window
def calc_srad_hum_it(df, tol=0.01, win_type='boxcar'): """ TODO """ window = np.zeros(n_days + 90) t_fmax = np.zeros(n_days) df['s_tfmax'] = 0.0 df['t_max'] = np.maximum(df['t_max'], df['t_min']) dtr = df['t_max'] - df['t_min'] sm_dtr = pd.rolling_window(dtr, window=30, freq='D', win_type=win_type).fillna(method='bfill') if n_days <= 30: print('Timeseries is shorter than rolling mean window, filling ') print('missing values with unsmoothed data') sm_dtr.fillna(dtr, inplace=True) sum_precip = df['s_precip'].values.sum() ann_precip = (sum_precip / n_days) * consts['DAYS_PER_YEAR'] if ann_precip == 0.0: ann_precip = 1.0 if n_days <= 90: sum_precip = df['s_precip'].values.sum() eff_ann_precip = (sum_precip / n_days) * consts['DAYS_PER_YEAR'] eff_ann_precip = np.maximum(eff_ann_precip, 8.0) parray = eff_ann_precip else: parray = np.zeros(n_days) start_yday = df['day_of_year'][0] end_yday = df['day_of_year'][-1] if start_yday != 1: if end_yday == start_yday - 1: isloop = True else: if end_yday == 365 or end_yday == 366: isloop = True if isloop: for i in range(90): window[i] = df['s_precip'][n_days - 90 + i] else: for i in range(90): window[i] = df['s_precip'][i] window[90:] = df['s_precip'] for i in range(n_days): sum_precip = 0.0 for j in range(90): sum_precip += window[i + j] sum_precip = (sum_precip / 90.) * consts['DAYS_PER_YEAR'] sum_precip = np.maximum(sum_precip, 8.0) parray[i] = sum_precip # FIXME: This is still bad form tt_max0, flat_potrad, slope_potrad, daylength, tiny_rad_fract = calc_solar_geom( ) # NOTE: Be careful with this one! disaggregate.tiny_rad_fract = tiny_rad_fract avg_horizon = (params['site_east_horiz'] + params['site_west_horiz']) / 2.0 horizon_scalar = 1.0 - np.sin(avg_horizon * consts['RADPERDEG']) if (params['site_slope'] > avg_horizon): slope_excess = params['site_slope'] - avg_horizon else: slope_excess = 0. if (2.0 * avg_horizon > 180.): slope_scalar = 0. else: slope_scalar = np.clip( 1. - (slope_excess / (180.0 - 2.0 * avg_horizon)), 0, None) sky_prop = horizon_scalar * slope_scalar b = params['B0'] + params['B1'] * np.exp(-params['B2'] * sm_dtr) t_fmax = 1.0 - 0.9 * np.exp(-b * np.power(dtr, params['C'])) inds = np.nonzero(df['precip'] > options['SW_PREC_THRESH'])[0] t_fmax[inds] *= params['RAIN_SCALAR'] df['s_tfmax'] = t_fmax tdew = df.get('tdew', df['s_t_min']) pva = df['s_hum'] if 's_hum' in df else svp(tdew) pa = atm_pres(params['site_elev']) yday = df['day_of_year'] - 1 df['s_dayl'] = daylength[yday] tdew_save = tdew pva_save = pva # FIXME: This function has lots of inputs and outputs tdew, pet = _compute_srad_humidity_onetime(tdew, pva, tt_max0, flat_potrad, slope_potrad, sky_prop, daylength, parray, pa, dtr, df) sum_pet = pet.values.sum() ann_pet = (sum_pet / n_days) * consts['DAYS_PER_YEAR'] # FIXME: Another really long conditional if (('tdew' in df) or ('s_hum' in df) or (options['VP_ITER'].upper() == 'VP_ITER_ANNUAL' and ann_pet / ann_precip >= 2.5)): tdew = tdew_save[:] pva = pva_save[:] # FIXME: Another really long conditional #if (options['VP_ITER'].upper() == 'VP_ITER_ALWAYS' or # (options['VP_ITER'].upper() == 'VP_ITER_ANNUAL' and # ann_pet / ann_precip >= 2.5) or # options['VP_ITER'].upper() == 'VP_ITER_CONVERGE'): # if (options['VP_ITER'].upper() == 'VP_ITER_CONVERGE'): # max_iter = 100 # else: # max_iter = 2 #else: # max_iter = 1 #FIXME Still want to reduce the number of args here #FIXME This also takes up the majority of the mtclim runtime rmse_tdew = tol + 1 #f = lambda x : rmse(_compute_srad_humidity_onetime(x, pva, tt_max0, flat_potrad, # slope_potrad, sky_prop, daylength, # parray, pa, dtr, df)[0], tdew) def f(x): tdew_calc = _compute_srad_humidity_onetime(x, pva, tt_max0, flat_potrad, slope_potrad, sky_prop, daylength, parray, pa, dtr, df)[0] print(tdew_calc - tdew) err = rmse(tdew_calc, tdew) print(err) return err res = minimize(f, tdew, tol=rmse_tdew) tdew = res.x pva = svp(tdew) if 's_hum' not in df: df['s_hum'] = pva pvs = svp(df['s_t_day']) vpd = pvs - pva df['s_vpd'] = np.maximum(vpd, 0.)
def rolling_smoother(self, data, stype='rolling_mean', win_size=10, win_type='boxcar', center=False, std=0.1, beta=0.1, power=1, width=1): """ Perform a espanding smooting on the data for a complete help refer to http://pandas.pydata.org/pandas-docs/dev/computation.html :param data: :param stype: :param win_size: :param win_type: :param center: :param std: :param beta: :param power: :param width: :moothing types: ROLLING : rolling_count Number of non-null observations rolling_sum Sum of values rolling_mean Mean of values rolling_median Arithmetic median of values rolling_min Minimum rolling_max Maximum rolling_std Unbiased standard deviation rolling_var Unbiased variance rolling_skew Unbiased skewness (3rd moment) rolling_kurt Unbiased kurtosis (4th moment) rolling_window Moving window function window types: boxcar triang blackman hamming bartlett parzen bohman blackmanharris nuttall barthann kaiser (needs beta) gaussian (needs std) general_gaussian (needs power, width) slepian (needs width) """ if stype == 'count': newy = pd.rolling_count(data, win_size) if stype == 'sum': newy = pd.rolling_sum(data, win_size) if stype == 'mean': newy = pd.rolling_mean(data, win_size) if stype == 'median': newy = pd.rolling_median(data, win_size) if stype == 'min': newy = pd.rolling_min(data, win_size) if stype == 'max': newy = pd.rolling_max(data, win_size) if stype == 'std': newy = pd.rolling_std(data, win_size) if stype == 'var': newy = pd.rolling_var(data, win_size) if stype == 'skew': newy = pd.rolling_skew(data, win_size) if stype == 'kurt': newy = pd.rolling_kurt(data, win_size) if stype == 'window': if win_type == 'kaiser': newy = pd.rolling_window(data, win_size, win_type, center=center, beta=beta) if win_type == 'gaussian': newy = pd.rolling_window(data, win_size, win_type, center=center, std=std) if win_type == 'general_gaussian': newy = pd.rolling_window(data, win_size, win_type, center=center, power=power, width=width) else: newy = pd.rolling_window(data, win_size, win_type, center=center) return newy
plt.plot(w1, newspeclist[i]+yoffset) #, label=datetimelist[i].iso[0:10]) yoffset = yoffset + 1 #plt.legend() plt.show() # single value decomposition svd = pyasl.SVD() svd.decompose(newspeclist[0], m) bflist = [] bfsmoothlist = [] for i in range (0, nspec): # Obtain the broadening function bf = svd.getBroadeningFunction(newspeclist[i]) # this one is like a matrix bfarray = svd.getBroadeningFunction(newspeclist[i], asarray=True) # Smooth the array-like broadening function bfsmooth = amp*pd.rolling_window(bfarray, window=5, win_type='gaussian', std=1.5, center=True) # The rolling window makes nans at the start because it's a punk. for j in range(0,len(bfsmooth)): if np.isnan(bfsmooth[j]) == True: bfsmooth[j] = 0 bflist.append(bf) bfsmoothlist.append(bfsmooth) # Obtain the indices in RV space that correspond to the BF bf_ind = svd.getRVAxis(r, 1) # plot the smoothed BFs # this plot is boring, skip it #plt.axis([-100, 70, -0.2, 12]) #plt.xlabel('Velocity (km s$^{-1}$)') #plt.ylabel('Broadening Function (arbitrary amplitude)') #yoffset = 0.0
# see http://stackoverflow.com/a/27626699 for the calculation shenanigans add_cls['Consecutive Up Days'] = ((add_cls['Daily Change'] - add_cls['Daily Change'].shift())>0).apply(lambda y : y * (y.groupby((y != y.shift()).cumsum()).cumcount() + 1)) # every object in add_clas dictionary is a data frame # so turn it into a panel and join it with the original Q=PNL.join( pd.Panel( add_cls ) ) # this section is for functions that return panels # they join with a simple panel.join call Q = Q.join( PNL.pct_change(periods = 1), how='inner', rsuffix=' Pct Change') # this one handles cases when a panel hasn't been fitted to some built-in yet # (this would probably be the function to wrap in a GenericWrapper for pipleine stuff) Q = Q.join(Q.apply( lambda x : pd.rolling_window( x, 5, 'gaussian', std=0.1) ), rsuffix=' Gaussian Mean') # lag a few days NUM_LAG_DAYS=3 Q = Q.join( [Q.shift(k).add_suffix(' Lag ' + str(k)) for k in range(1, NUM_LAG_DAYS+1)] ) # add some rolling correlation between series' Q = Q.join(pd.rolling_corr( Q['Daily Change'] , pairwise=True, window=5).transpose(2,0,1)) # add some rolling std Q = Q.join ( pd.Panel( { 'rolling std' : pd.rolling_std( Q['Daily Change'], 5 )} ) ) Q = Q.join ( Q.apply( lambda x : pd.rolling_std( x, 5 ) ), rsuffix=' rolling std' ) print Q.items.tolist()
i_data = utils.get_data(datadir, ifile, delta) inun_data = i_data["data"] """ delta = utils.get_data(datadir, datafile,deltaname) fig = plt.figure() ax1 = fig.add_subplot(1, 1, 1) locations = delta['data'].iloc[:, 0] y = (1.2* locations ) + (.3* np.random.randn(len(locations))) print locations print stats.pearsonr(locations, y) plt.savefig('plot.png') """ ################################ precip = pd.rolling_window(precip_data.mean(axis=1), window=45, win_type="gaussian", std=39) end = min(precip.index[-1], inun_data.index[-1]) iclip = inun_data.mean(axis=1)[:end] pclip = precip[iclip.index[0] : end : 10] mask = np.isfinite(iclip) & np.isfinite(pclip) ################################ # print alldeltas['inundation']['Ganges'] fig = plt.figure() ax1 = fig.add_subplot(1, 1, 1) # locations = alldeltas['precip']['Ganges'] # ax1.plot(locations) # print stats.pearsonr(locations, y)
df2.head() df.drop(['vobs', 'vroms'], axis=1).plot(figsize=(8,12), subplots=True) df2.drop(['vobs', 'vroms'], axis=1).plot(figsize=(8,12), subplots=True) df['uromsday'] = df2['uroms'] df.head() df.head(40) df3 = df.dropna() df3 df3.uroms.plot() df3.uromsdat.plot() df3.uromsday.plot() df3.uromsday.plot() df.uroms.plot() df3.uromsday.plot(linewidth=3) get_ipython().set_next_input(u"df['ufilt'] = pd.rolling_window");get_ipython().magic(u'pinfo pd.rolling_window') df['ufilt'] = pd.rolling_window(window_type='hanning', window_size=24) df['ufilt'] = pd.rolling_window(df['uroms'], window_type='hanning', window_size=24) df['ufilt'] = pd.rolling_window(df['uroms'], window_type='hanning', window=24) df['ufilt'] = pd.rolling_window(df['uroms'], window_type='hamming', window=24) get_ipython().set_next_input(u"df['ufilt'] = pd.rolling_window");get_ipython().magic(u'pinfo pd.rolling_window') df['ufilt'] = pd.rolling_window(df['uroms'], win_type='hamming', window=24) df3.ufilt.plot(linewidth=3) df.ufilt.plot(linewidth=3) df['ufilt40'] = pd.rolling_window(df['uroms'], win_type='hamming', window=40) df.ufilt40.plot(linewidth=3) plt.legend() df.to_csv('roms_vs_obs_filters.csv', sep='\t') df.describe() df.describe().T filename = '/home/phellipe/Desktop/uerj-pythoncourse-20150629/sandbox/eduardorichard/A_SED.txt' df = pd.read_csv(sep='\t')
def calc_srad_humidity_iterative(self, tol=0.01, win_type='boxcar'): ''' Iterative estimation of shortwave radiation and humidity''' ndays = self.ndays daylength = np.zeros(366) window = np.zeros(ndays + 90) tinystepspday = 86400 / constants['SRADDT'] tiny_radfract = np.zeros((366, tinystepspday)) ttmax0 = np.zeros(366) flat_potrad = np.zeros(366) slope_potrad = np.zeros(366) t_fmax = np.zeros(ndays) self.data['s_tfmax'] = 0. # calculate diurnal temperature range for transmittance calculations self.data['tmax'] = np.maximum(self.data['tmax'], self.data['tmin']) dtr = self.data['tmax'] - self.data['tmin'] # smooth dtr array: After Bristow and Campbell, 1984 # use 30-day antecedent smoothing window sm_dtr = pd.rolling_window(dtr, window=30, freq='D', win_type=win_type).fillna(method='bfill') if self.ndays <= 30: warn('Timeseries is shorter than rolling mean window, filling ' 'missing values with unsmoothed data.') sm_dtr.fillna(dtr, inplace=True) # calculate the annual total precip sum_prcp = self.data['s_prcp'].values.sum() ann_prcp = (sum_prcp / self.ndays) * 365.25 if (ann_prcp == 0.): ann_prcp = 1.0 # Generate the effective annual precip, based on a 3-month # moving-window. Requires some special case handling for the # beginning of the record and for short records. # check if there are at least 90 days in this input file, if not, # use a simple total scaled to effective annual precip if (ndays < 90): sum_prcp = self.data['s_prcp'].values.sum() effann_prcp = (sum_prcp / self.ndays) * 365.25 # if the effective annual precip for this period # is less than 8 cm, set the effective annual precip to 8 cm # to reflect an arid condition, while avoiding possible # division-by-zero errors and very large ratios (PET/Pann) effann_prcp = np.maximum(effann_prcp, 8.) parray = effann_prcp else: # Check if the yeardays at beginning and the end of this input file # match up. If so, use parts of the three months at the end # of the input file to generate effective annual precip for # the first 3-months. Otherwise, duplicate the first 90 days # of the record. start_yday = self.data.index.dayofyear[0] end_yday = self.data.index.dayofyear[ndays - 1] if (start_yday != 1): if end_yday == start_yday - 1: isloop = True else: if end_yday == 365 or end_yday == 366: isloop = True # fill the first 90 days of window for i in range(90): if (isloop): window[i] = self.data['s_prcp'][ndays - 90 + i] else: window[i] = self.data['s_prcp'][i] # fill the rest of the window array window[90:] = self.data['s_prcp'] # for each day, calculate the effective annual precip from # scaled 90-day total for i in range(self.ndays): sum_prcp = 0. for j in range(90): sum_prcp += window[i + j] sum_prcp = (sum_prcp / 90.) * 365.25 # if the effective annual precip for this 90-day period # is less than 8 cm, set the effective annual precip to 8 cm # to reflect an arid condition, while avoiding possible # division-by-zero errors and very large ratios (PET/Pann) if sum_prcp < 8.: parray[i] = sum_prcp # start of the main radiation algorithm # before starting the iterative algorithm between humidity and # radiation, calculate all the variables that don't depend on # humidity so they only get done once. # STEP (1) calculate pressure ratio (site/reference) = f(elevation) t1 = 1.0 - (constants['LR_STD'] * self.parameters['site_elev']) / \ constants['T_STD'] t2 = constants['G_STD'] / (constants['LR_STD'] * (constants['R'] / constants['MA'])) pratio = np.power(t1, t2) # STEP (2) correct initial transmittance for elevation trans1 = np.power(self.parameters['TBASE'], pratio) # STEP (3) build 366-day array of ttmax0, potential rad, and daylength # precalculate the transcendentals lat = self.parameters['site_lat'] # check for (+/-) 90 degrees latitude, throws off daylength calc lat *= constants['RADPERDEG'] if (lat > np.pi / 2.): lat = np.pi / 2. if (lat < -np.pi / 2.): lat = -np.pi / 2. coslat = np.cos(lat) sinlat = np.sin(lat) cosslp = np.cos(self.parameters['site_slope'] * constants['RADPERDEG']) sinslp = np.sin(self.parameters['site_slope'] * constants['RADPERDEG']) cosasp = np.cos(self.parameters['site_aspect'] * constants['RADPERDEG']) sinasp = np.sin(self.parameters['site_aspect'] * constants['RADPERDEG']) # cosine of zenith angle for east and west horizons coszeh = np.cos(np.pi / 2. - (self.parameters['site_east_horiz'] * constants['RADPERDEG'])) coszwh = np.cos(np.pi / 2. - (self.parameters['site_west_horiz'] * constants['RADPERDEG'])) # sub-daily time and angular increment information dt = constants['SRADDT'] # set timestep dh = dt / constants['SECPERRAD'] # calculate hour-angle step # begin loop through yeardays for i in range(365): # calculate cos and sin of declination decl = constants['MINDECL'] * np.cos( (i + constants['DAYSOFF']) * constants['RADPERDAY']) cosdecl = np.cos(decl) sindecl = np.sin(decl) # do some precalculations for beam-slope geometry (bsg) bsg1 = -sinslp * sinasp * cosdecl bsg2 = (-cosasp * sinslp * sinlat + cosslp * coslat) * cosdecl bsg3 = (cosasp * sinslp * coslat + cosslp * sinlat) * sindecl # calculate daylength as a function of lat and decl cosegeom = coslat * cosdecl sinegeom = sinlat * sindecl coshss = -(sinegeom) / cosegeom if (coshss < -1.0): coshss = -1.0 # 24-hr daylight if (coshss > 1.0): coshss = 1.0 # 0-hr daylight hss = np.cos(coshss) # hour angle at sunset (radians) # daylength (seconds) daylength[i] = 2.0 * hss * constants['SECPERRAD'] if (daylength[i] > 86400): daylength[i] = 86400 # solar constant as a function of yearday (W/m^2) sc = 1368.0 + 45.5 * np.sin((2.0 * np.pi * i / 365.25) + 1.7) # extraterrestrial radiation perpendicular to beam, total over # the timestep (J) dir_beam_topa = sc * dt sum_trans = 0. sum_flat_potrad = 0. sum_slope_potrad = 0. # begin sub-daily hour-angle loop, from -hss to hss for h in np.arange(-hss, hss, dh): # precalculate cos and sin of hour angle cosh = np.cos(h) sinh = np.sin(h) # calculate cosine of solar zenith angle cza = cosegeom * cosh + sinegeom # calculate cosine of beam-slope angle cbsa = sinh * bsg1 + cosh * bsg2 + bsg3 # check if sun is above a flat horizon if (cza > 0.): # when sun is above the ideal (flat) horizon, do all the # flat-surface calculations to determine daily total # transmittance, and save flat-surface potential radiation # for later calculations of diffuse radiation # potential radiation for this time period, flat surface, # top of atmosphere dir_flat_topa = dir_beam_topa * cza # determine optical air mass am = 1.0 / (cza + 0.0000001) if (am > 2.9): ami = int((np.cos(cza) / constants['RADPERDEG'])) - 69 if (ami < 0): ami = 0 if (ami > 20): ami = 20 am = constants['OPTAM'][ami] # correct instantaneous transmittance for this optical # air mass trans2 = np.power(trans1, am) # instantaneous transmittance is weighted by potential # radiation for flat surface at top of atmosphere to get # daily total transmittance sum_trans += trans2 * dir_flat_topa # keep track of total potential radiation on a flat # surface for ideal horizons sum_flat_potrad += dir_flat_topa # keep track of whether this time step contributes to # component 1 (direct on slope) if ((h < 0. and cza > coszeh and cbsa > 0.) or (h >= 0. and cza > coszwh and cbsa > 0.)): # sun between east and west horizons, and direct on # slope. this period contributes to component 1 sum_slope_potrad += dir_beam_topa * cbsa else: dir_flat_topa = -1 tinystep = (12 * 3600 + h * constants['SECPERRAD']) / \ constants['SRADDT'] if (tinystep < 0): tinystep = 0 if (tinystep > tinystepspday - 1): tinystep = tinystepspday - 1 if (dir_flat_topa > 0): tiny_radfract[i][tinystep] = dir_flat_topa else: tiny_radfract[i][tinystep] = 0 if (daylength[i] and sum_flat_potrad > 0): tiny_radfract[i] /= sum_flat_potrad # calculate maximum daily total transmittance and daylight average # flux density for a flat surface and the slope if (daylength[i]): ttmax0[i] = sum_trans / sum_flat_potrad flat_potrad[i] = sum_flat_potrad / daylength[i] slope_potrad[i] = sum_slope_potrad / daylength[i] else: ttmax0[i] = 0. flat_potrad[i] = 0. slope_potrad[i] = 0. # force yearday 366 = yearday 365 ttmax0[365] = ttmax0[364] flat_potrad[365] = flat_potrad[364] slope_potrad[365] = slope_potrad[364] daylength[365] = daylength[364] tiny_radfract[365] = tiny_radfract[364] # STEP (4) calculate the sky proportion for diffuse radiation # uses the product of spherical cap defined by average horizon angle # and the great-circle truncation of a hemisphere. this factor does not # vary by yearday. avg_horizon = (self.parameters['site_east_horiz'] + self.parameters['site_west_horiz']) / 2.0 horizon_scalar = 1.0 - np.sin(avg_horizon * constants['RADPERDEG']) if (self.parameters['site_slope'] > avg_horizon): slope_excess = self.parameters['site_slope'] - avg_horizon else: slope_excess = 0. if (2.0 * avg_horizon > 180.): slope_scalar = 0. else: slope_scalar = 1.0 - (slope_excess / (180.0 - 2.0 * avg_horizon)) if (slope_scalar < 0.): slope_scalar = 0. sky_prop = horizon_scalar * slope_scalar # b parameter, and t_fmax not varying with Tdew, so these can be # calculated once, outside the iteration between radiation and humidity # estimates. Requires storing t_fmax in an array. # b parameter from 30-day average of DTR b = self.parameters['B0'] + self.parameters['B1'] * \ np.exp(-self.parameters['B2'] * sm_dtr) # proportion of daily maximum transmittance t_fmax = 1.0 - 0.9 * np.exp(-b * np.power(dtr, self.parameters['C'])) # correct for precipitation if this is a rain day inds = np.nonzero( self.data['prcp'] > self.options['SW_PREC_THRESH'])[0] t_fmax[inds] *= self.parameters['RAIN_SCALAR'] self.data['s_tfmax'] = t_fmax # Initial values of vapor pressure, etc if 'tdew' in self.data: # Observed Tdew supplied tdew = self.data['tdew'] else: # Estimate Tdew tdew = self.data['s_tmin'] if 's_hum' in self.data: # Observed vapor pressure supplied pva = self.data['s_hum'] else: # convert dewpoint to vapor pressure pva = svp(tdew) # Other values needed for srad_humidity calculation pa = atm_pres(self.parameters['site_elev']) yday = self.data.index.dayofyear - 1 self.data['s_dayl'] = daylength[yday] tdew_save = tdew pva_save = pva # Initial estimates of solar radiation, cloud fraction, etc. tdew, pva, pet = self._compute_srad_humidity_onetime( tdew, pva, ttmax0, flat_potrad, slope_potrad, sky_prop, daylength, parray, pa, dtr) # estimate annual PET sum_pet = pet.values.sum() ann_pet = (sum_pet / self.ndays) * 365.25 # Reset humidity terms if no iteration desired if (('tdew' in self.data) or ('s_hum' in self.data) or (self.options['VP_ITER'].upper() == 'VP_ITER_ANNUAL' and ann_pet / ann_prcp >= 2.5)): tdew = tdew_save[:] pva = pva_save[:] # Set up srad-humidity iterations if (self.options['VP_ITER'].upper() == 'VP_ITER_ALWAYS' or (self.options['VP_ITER'].upper() == 'VP_ITER_ANNUAL' and ann_pet / ann_prcp >= 2.5) or self.options['VP_ITER'].upper() == 'VP_ITER_CONVERGE'): if (self.options['VP_ITER'].upper() == 'VP_ITER_CONVERGE'): max_iter = 100 else: max_iter = 2 else: max_iter = 1 # srad-humidity iterations iter_i = 1 rmse_tdew = tol + 1 while (rmse_tdew > tol and iter_i < max_iter): tdew_save = tdew[:] tdew, pva, pet = self._compute_srad_humidity_onetime( tdew, pva, ttmax0, flat_potrad, slope_potrad, sky_prop, daylength, parray, pa, dtr) rmse_tdew = 0 for i in range(self.ndays): # use rmse function and vectorize rmse_tdew += (tdew[i] - tdew_save[i]) * \ (tdew[i] - tdew_save[i]) rmse_tdew /= self.ndays rmse_tdew = np.power(rmse_tdew, 0.5) iter_i += 1 # save humidity in output data structure if 's_hum' not in self.data: self.data['s_hum'] = pva # output humidity as vapor pressure deficit (Pa) # calculate saturated VP at tday pvs = svp(self.data['s_tday']) vpd = pvs - pva self.data['s_vpd'] = np.maximum(vpd, 0.)
assert ts_1min.index.freq == pd.tseries.offsets.Minute() ts_downsampled_15min = ts_1min.resample('15min', closed='right', label='right').mean() # Test consistency to input data when downsampling pd.testing.assert_series_equal(ts_downsampled_15min, data_15min['noisy_sin']) data_1h = data_15min.resample('1h', closed='right', label='right').mean() #%% fig, ax = plt.subplots() sin_1min.plot(ax=ax) sin_1min_smooth = pd.rolling_window(sin_1min, window=20, win_type='cosine', center=True, min_periods=1) sin_1min_smooth.plot(ax=ax, linestyle='--') a = sin_1min.iloc[60:-61] b = sin_1min_smooth.iloc[60:-61] r = b - a ar = r / a rmse = np.sqrt(np.mean(r**2)) rel_rmse = rmse / a.mean() rel_bias = r.mean() / a.mean() rel_mae = r.abs().mean() / a.mean() print("BIAS:{0:%} RMSE: {1:%} MAE: {2:%}".format(rel_bias, rel_rmse, rel_mae)) #pd.testing.assert_series_equal(a, b)
def rolling(self, win_type, window): dframe = self.dframe()[self.schema.numeric_slugs] return BambooFrame(rolling_window(dframe, window, win_type))
# First pull just the data with zero snowpack and non-rain days (I wish we had hourly precip!) ds = data[:, 3] no_snow = ds == 0.0 dr = data[:, 2] no_rain = dr == 0.0 mask = np.logical_and(no_snow, no_rain) elig_data = data[mask] df = pd.DataFrame(elig_data[:, :], columns=['date', 'discharge', 'precipitation', 'snow']) # The following data are very noisy, try smoothing it out, pandas has lots of options to # experiment with ser = pd.Series(df['discharge']) roll = pd.rolling_mean(ser, 300) hamming = pd.rolling_window(ser, 1000, 'hamming') df['hamming'] = hamming df['rolling'] = roll # Plot smoothed data for inspection # fig, ax = plt.subplots(1, figsize=(15, 5)) # ax.plot(df['date'], df['hamming'], 'g', label='Hamming Discharge (cfs) Window = 1000') # ax.plot(df['date'], df['rolling'], 'r', label='Rolling Mean Discharge (cfs) Window= 300') # ax.plot(df['date'], df['discharge'], 'b', label='Measured Discharge (cfs)', alpha=0.3) # ax.set_ylabel('Discharge (cfs)', color='k') # ax.set_xlabel('Date') # # plt.ylim(0.0, 1.0) # for tl in ax.get_yticklabels(): # tl.set_color('b') # plt.title('Summer Hydrograph') # plt.legend()
data=[] time=[] oldValue = [0,0,0]; initialTime = float(inputData[0][0]) step = 0 total = 0 for line in inputData: modulus = abs(line[1]-oldValue[0])+abs(line[2]-oldValue[1])+abs(line[3]-oldValue[2]) oldValue[0]=line[1] oldValue[1]=line[2] oldValue[2]=line[3] step += 1 total += modulus if ( step >= window ): data += [total] time += [datetime.datetime.fromtimestamp((line[0]-initialTime)/1000.0)] step = 0 total = 0 return (time,data) inputData = getDataFromFile('../../Desktop/gunicorn.log','ettore') (time,data) = computeModulo(inputData,1) ts = pd.Series(data=data,index=time) ts = pd.rolling_window(ts, window=1000, win_type='triang') ts.plot(style='c-') #ts = ts.interpolate(method='time') plt.show()
initialTime = float(inputData[0][0]) step = 0 total = 0 for line in inputData: modulus = abs(line[1] - oldValue[0]) + abs( line[2] - oldValue[1]) + abs(line[3] - oldValue[2]) oldValue[0] = line[1] oldValue[1] = line[2] oldValue[2] = line[3] step += 1 total += modulus if (step >= window): data += [total] time += [ datetime.datetime.fromtimestamp( (line[0] - initialTime) / 1000.0) ] step = 0 total = 0 return (time, data) inputData = getDataFromFile('../../Desktop/gunicorn.log', 'ettore') (time, data) = computeModulo(inputData, 1) ts = pd.Series(data=data, index=time) ts = pd.rolling_window(ts, window=1000, win_type='triang') ts.plot(style='c-') #ts = ts.interpolate(method='time') plt.show()