def predict(self, data=None): if(self.use_period): # decomfreq = freq res = sm.tsa.seasonal_decompose(self.data.tolist(), freq=self.freq, model=self.model) # res.plot() median_trend = pd.rolling_median(Series(self.data),window=self.freq, center=True, min_periods=1) resid = res.observed - res.seasonal - median_trend else: resid = self.data random = Series(resid) mean_nan = 0 std_nan = 0 # random = res.resid if (self.mode == 'average'): mean_nan = np.nanmean(random) std_nan = np.nanstd(random) elif (self.mode == 'median'): rolling_median = pd.rolling_median(random,3,center=True, min_periods=1) mean_nan = np.nanmean(rolling_median) std_nan = np.nanstd(rolling_median) min_val = mean_nan - 4 * std_nan # max_val = mean(random, na.rm = T) + 4*sd(random, na.rm = T) max_val = mean_nan + 4 * std_nan position = Series(resid.tolist(), index=np.arange(resid.shape[0])) anomaly = position[(position > max_val) | (position < min_val)] # anomalyL = position[(position<min_val)] # anomaly = anomalyH.append(anomalyL).drop_duplicates() point_anomaly_idx = anomaly.index self.anomaly_idx = point_anomaly_idx points_anomaly = self.data[point_anomaly_idx] self.anomalies = points_anomaly return points_anomaly
def buildTS(path): df = pd.read_csv(path) df = df[[ 'gmDate', 'playDispNm', 'teamAbbr', 'teamDayOff', 'playPos', 'playStat', 'playMin' ]] datfrm = datfrm = pd.DataFrame() for i in df.playDispNm.unique(): pdf = df[df.playDispNm == i].sort_values(by='gmDate') pdf['prevgm'] = pdf.playMin.shift(1) pdf['pavg3'] = pd.rolling_mean(pdf.playMin, 3) pdf['pavg5'] = pd.rolling_mean(pdf.playMin, 5) pdf['pavg10'] = pd.rolling_mean(pdf.playMin, 10) #pdf['pavg20'] = pd.rolling_mean(pdf.playMin,20) pdf['pmed3'] = pd.rolling_median(pdf.playMin, 3) pdf['pmed5'] = pd.rolling_median(pdf.playMin, 5) pdf['pmed10'] = pd.rolling_median(pdf.playMin, 10) #pdf['pmed20'] = pd.rolling_median(pdf.playMin,20) pdf['pstd3'] = pd.rolling_std(pdf.playMin, 3) pdf['pstd5'] = pd.rolling_std(pdf.playMin, 5) pdf['pstd10'] = pd.rolling_std(pdf.playMin, 10) #pdf['pstd20'] = pd.rolling_std(pdf.playMin,20) #print(pdf.tail) datfrm = datfrm.append(pdf.dropna()) #print(len(datfrm)) return datfrm
def plot_rolling_functions(series, window_size=128): pd.rolling_median(series,window_size).plot(label='median') pd.rolling_mean(series,window_size).plot(label='mean') pd.rolling_std(series,window_size).plot(label='std') pd.rolling_skew(series,window_size).plot(label='skew') pd.rolling_kurt(series,window_size).plot(label='kurt') pd.rolling_min(series,window_size).plot(label='min') pd.rolling_max(series,window_size).plot(label='max') plt.title('Various rolling window functions, window size %s' % (window_size)) plt.legend() plt.show()
def plot_downsampled_rolling_median(series, window_size=64, original_freq=512, freq=10): median = pd.rolling_median(series, window_size) step = original_freq/freq downsampled = pd.rolling_median(series, window_size)[::step] pd.Series(series).plot() downsampled.plot() plt.title('rolling_median, window_size=%s, downsampled to %sHz' % (window_size, freq)) annotations = [ plt.annotate(int(val), (step*index, val)) for index,val in enumerate(downsampled) if not np.isnan(val) ] plt.legend(('original', 'rolling_median')) plt.show()
def plot_rolling_subplot(ax, series, labels, colors): """Subplot with smoothed values (using moving median).""" for s, label, color in zip(series, labels, colors): rolling_median = pd.rolling_median(s, window=5) ax.plot(s.index, rolling_median, label=label, color=color) ymin, ymax = ax.get_ylim() plt.ylim(ymin=0, ymax=max(1, ymax * 1.05))
def smooth_holidays(df, holidays_df, length=1): # TODO: Add support for hours mmd_df = pd.rolling_median(df, 7) indices = df.loc[holidays_df.index.values].dropna().index.values indices = pad_dates(indices, freq='D', length=1) df = replace_rows(df, mmd_df, indices) return df
def get_meet_flt(df_meet, window=8): """ data processing with median filter. """ df_flt = pd.rolling_median(df_meet, window=window) return df_flt
def rolling_median(x, width): """Rolling median with mirrored edges.""" x, wing = check_inputs(x, width) # Pad the edges of the original array with mirror copies signal = np.concatenate((x[wing-1::-1], x, x[:-wing-1:-1])) rolled = pd.rolling_median(signal, 2 * wing + 1, center=True) return rolled[wing:-wing]
def smooth_holidays(df, holidays_df, length=1): """ Smoothes out peeks and troughs in df, using indices in holiday_df. Parameters ---------- df : pd.DataFrame Data Frame to smooth. holidays_df : pd.DataFrame Defines the indices to use when smoothing. The function does not care about the values in holiday_df, only if they are present for a certain row index. This index is then used to smooth df. length : int, default 1 This argument is passed to pad_dates(). This defines the 'padding' to the holiday. Assume may 1. is a holiday. If length = 1, then april 30. may 1. and may 2. will be replaced with a 'smoothed' value. Returns ------- smoothed_df : pd.DataFrame Data frame with holidays and days leading up to and after, according to length, smoothed. """ # TODO: Add support for hours mmd_df = pd.rolling_median(df, 7) indices = df.loc[holidays_df.index.values].dropna().index.values indices = pad_dates(indices, freq='D', length=length) df = replace_rows(df, mmd_df, indices) return df
def movingmedian(interval, window_size): if pandas: ### use pandas implementation if available tmp = numpy.copy(interval) if pandas.__version__ >= '0.18.1': tmp[window_size:len(interval) - window_size] = numpy.array( pandas.Series(tmp).rolling( 2 * window_size).median()[2 * window_size - 1:-1]) else: tmp[window_size:len(interval) - window_size] = pandas.rolling_median( tmp, 2 * window_size)[2 * window_size - 1:-1] else: interval = list(interval) tmp = numpy.copy(interval) A = None As = None prev = None for i in range(window_size, len(interval) - window_size): if A is None: A = interval[i - window_size:i + window_size] ix = numpy.argsort(A) As = list(numpy.array(A)[ix]) else: newdata = interval[i + window_size - 1] A = A + [newdata] bisect.insort(As, newdata) if len(As) % 2: tmp[i] = As[len(As) // 2] else: tmp[i] = (As[len(As) // 2 - 1] + As[len(As) // 2]) / 2. prev = A.pop(0) del As[bisect.bisect_left(As, prev)] return tmp
def get_order(cls, data, segments=2, window=7, writer=None, charts=False, verbose=False): ''' generate orders from segtrends ''' price = data[cls.field] x_maxima, maxima, x_minima, minima = segtrends(price, segments, window, charts=charts) if writer or cls.predict: features = cls.get_order_features_from_trend( segments, x_maxima, maxima, x_minima, minima) vol_pct_change = data['Volume'][-(window + 1):].pct_change()[-window:] last = data[cls.field][-1] roll_mean_var = (pd.rolling_mean(data[cls.field][-window:], window)[-1] - last) / last roll_median_var = (pd.rolling_median(data[cls.field][-window:], window)[-1] - last) / last for add in (vol_pct_change, roll_mean_var, roll_median_var): features = np.append(features, add) if writer: writer.writerow(features) if cls.predict: order = -1 if cls.predict([features]) == 0 else 1 return order else: return cls.get_order_from_trend(minima, maxima, verbose)
def LowPassFilter(values): threshold = values.mean() + 3 * values.std() ResE = rolling_median( values, window=15, center=True).fillna(method='bfill').fillna(method='ffill') #ResE = NormValues(ResEtemp) return ResE
def get_scatter_data_for_code_vol( system, instrument_code, rule_name, return_period=5, days=64): denom_price = system.rawdata.daily_denominator_price(instrument_code) x = system.rawdata.daily_returns(instrument_code) vol = robust_vol_calc(x, days) perc_vol = 100.0 * divide_df_single_column(vol, denom_price.shift(1)) volavg = pd.rolling_median(perc_vol, 1250, min_periods=10) vol_qq = (perc_vol - volavg) / volavg # work out return for the N days after the forecast norm_data = system.accounts.pandl_for_instrument_forecast( instrument_code, rule_name) (vol_qq, norm_data) = align_to_joint( vol_qq, norm_data, ffill=(True, False)) period_returns = pd.rolling_sum(norm_data, return_period, min_periods=1) ex_post_returns = period_returns.shift(-return_period) lagged_vol = vol_qq.shift(1) return (list(ex_post_returns.iloc[:, 0].values), list( lagged_vol.iloc[:, 0].values))
def rolling_functions_tests(p, d): # Old-fashioned rolling API assert_eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) assert_eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) assert_eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) assert_eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) assert_eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) assert_eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) assert_eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) assert_eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) # see note around test_rolling_dataframe for logic concerning precision assert_eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3), check_less_precise=True) assert_eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3), check_less_precise=True) assert_eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) assert_eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) with ignoring(ImportError): assert_eq(pd.rolling_window(p, 3, 'boxcar'), dd.rolling_window(d, 3, 'boxcar')) # Test with edge-case window sizes assert_eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) assert_eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs assert_eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def is_spike(series, window_size=3, threshold=3, scale=True): """ Flags spikes in an array-like object using a median filter of `window_size` and a `threshold` for the median difference. If `scale=False` the differences are not scale by the data standard deviation and the masking is "aggressive." Examples -------- >>> from pandas import Series, date_range >>> series = [33.43, 33.45, 34.45, 90.0, 35.67, 34.9, 43.5, 34.6, 33.7] >>> series = Series(series, index=date_range('1980-01-19', ... periods=len(series))) >>> series[is_spike(series, window_size=3, threshold=3, scale=False)] 1980-01-22 90.0 1980-01-25 43.5 dtype: float64 >>> series[is_spike(series, window_size=3, threshold=3, scale=True)] 1980-01-22 90 Freq: D, dtype: float64 """ # bfill+ffil needs a series and won't affect the median. series = Series(series) medians = rolling_median(series, window=window_size, center=True) medians = medians.fillna(method='bfill').fillna(method='ffill') difference = np.abs(series - medians).values if scale: return difference > (threshold*difference.std()) return difference > threshold
def GapFlat(time, flux, order=3, maxgap=0.125): ''' Parameters ---------- Returns ------- Data with polymonials removed ''' _, dl, dr = FindGaps(time, maxgap=maxgap) # finds right edge of time windows tot_med = np.nanmedian(flux) # the total from all quarters flux_flat = np.array(flux, copy=True) for i in range(0, len(dl)): krnl = int(float(dl[i] - dr[i]) / 100.0) if (krnl < 10): krnl = 10 flux_sm = rolling_median(flux[dl[i]:dr[i]], krnl) indx = np.isfinite(flux_sm) fit = np.polyfit(time[dl[i]:dr[i]][indx], flux_sm[indx], order) flux_flat[dl[i]:dr[i]] = flux[dl[i]:dr[i]] - \ np.polyval(fit, time[dl[i]:dr[i]]) + \ tot_med return flux_flat
def get_scatter_data_for_code_vol(system, instrument_code, rule_name, return_period=5, days=64): denom_price = system.rawdata.daily_denominator_price(instrument_code) x = system.rawdata.daily_returns(instrument_code) vol = robust_vol_calc(x, days) perc_vol = 100.0 * divide_df_single_column(vol, denom_price.shift(1)) volavg = pd.rolling_median(perc_vol, 1250, min_periods=10) vol_qq = (perc_vol - volavg) / volavg ## work out return for the N days after the forecast norm_data = system.accounts.pandl_for_instrument_forecast( instrument_code, rule_name) (vol_qq, norm_data) = align_to_joint(vol_qq, norm_data, ffill=(True, False)) period_returns = pd.rolling_sum(norm_data, return_period, min_periods=1) ex_post_returns = period_returns.shift(-return_period) lagged_vol = vol_qq.shift(1) return (list(ex_post_returns.iloc[:, 0].values), list(lagged_vol.iloc[:, 0].values))
def calc_vol_profiles(full_df): full_df['dpvolume_med_21'] = np.nan full_df['dpvolume_std_21'] = np.nan full_df['dpvolume'] = full_df['dvolume'] * full_df['dvwap'] print("Calculating trailing volume profile...") for timeslice in [ '09:45', '10:00', '10:15', '10:30', '10:45', '11:00', '11:15', '11:30', '11:45', '12:00', '12:15', '12:30', '12:45', '13:00', '13:15', '13:30', '13:45', '14:00', '14:15', '14:30', '14:45', '15:00', '15:15', '15:30', '15:45', '16:00' ]: timeslice_df = full_df[['dpvolume', 'tradable_med_volume_21', 'close']] timeslice_df = timeslice_df.unstack().between_time( timeslice, timeslice).stack() timeslice_df = timeslice_df.dropna() if len(timeslice_df) == 0: continue timeslice_df['dpvolume_med_21'] = timeslice_df['dpvolume'].groupby( level='sid').apply(lambda x: pd.rolling_median(x.shift(1), 21)) timeslice_df['dpvolume_std_21'] = timeslice_df['dpvolume'].groupby( level='sid').apply(lambda x: pd.rolling_std(x.shift(1), 21)) m_df = timeslice_df.dropna() print(m_df.head()) print("Average dvol frac at {}: {}".format( timeslice, (m_df['dpvolume_med_21'] / (m_df['tradable_med_volume_21'] * m_df['close'])).mean())) full_df.ix[timeslice_df.index, 'dpvolume_med_21'] = timeslice_df['dpvolume_med_21'] full_df.ix[timeslice_df.index, 'dpvolume_std_21'] = timeslice_df['dpvolume_std_21'] return full_df
def rolling_median(x, width): """Rolling median with mirrored edges.""" x, wing = check_inputs(x, width) # Pad the edges of the original array with mirror copies signal = np.concatenate((x[wing - 1::-1], x, x[:-wing - 1:-1])) rolled = pd.rolling_median(signal, 2 * wing + 1, center=True) return rolled[wing:-wing]
def despike(data, window = 20, n_deviation = 0.35): df = pd.DataFrame(data) data_median_pd = df.copy() data_diff_pd = df.copy() outlier_idx = df.copy() data_despiked = df.copy() row,column = data.shape for n in range(1,column): data_median_pd[n] = rolling_median(df[n], window=window, center=True).fillna(method='bfill').fillna(method='ffill') data_diff_pd[n] = np.abs(df[n] - data_median_pd[n]) # deviation from the rolling median outlier_idx[n] = data_diff_pd[n] > n_deviation*df[n].std() # spike: if the deviation is much more than the global standard deviation data_despiked[n][outlier_idx[n]] = data_median_pd[n][outlier_idx[n]] # replaced the spikes with the rolling median values if outlier_idx[n].any(): # plot if any spike is detected # the following plots most of the relavent calculations #smart_plot([ df[0], df[0][outlier_idx[n] ], df[0], df[0], df[0],df[0] ], [ df[n], df[n][outlier_idx[n]], data_median_pd[n], data_diff_pd[n], df_mad, data_despiked[n] ], x_label='Energy (eV)', y_label='PL', # label = ['data', 'spikes', 'median', 'data-median', 'deviation', 'despiked' ],lines=['-','x','--', '-','-','-'], ms=[1,15,1,1,1,1], annotate = 3, figsize=(16,10)) smart_plot([ df[0], df[0] ], [ df[n], data_despiked[n] ], legend_title='Removing Spikes',label = [data_labels[n], 'After removal'], x_label='Energy (eV)', y_label='PL', lines=['-','-'], ms=[1,15], annotate = 0) plt.show() return data_despiked
def create_preds(language): TRAIN_FILE = language + "_train.csv" TEST_FILE = language + "_test.csv" PREDS_FILE = language + "_preds.csv" TRAIN_PATH = os.path.join(os.getcwd() + "\\train_test\\" + TRAIN_FILE) TEST_PATH = os.path.join(os.getcwd() + "\\train_test\\" + TEST_FILE) PREDS_PATH = os.path.join(os.getcwd() + "\\preds\\" + PREDS_FILE) train = pd.read_csv(TRAIN_PATH) test = pd.read_csv(TEST_PATH) train_flattened = pd.melt(train[list(train.columns[-61:-1]) + ['Page']], id_vars='Page', var_name='date', value_name='Visits') grouped = train_flattened.groupby(['Page'])['Visits'] rolling_meds = pd.rolling_median(grouped, window=7) test['Visits'] = rolling_meds.values test.loc[test.Visits.isnull(), 'Visits'] = 0.0 test[['Id', 'Visits']].to_csv(PREDS_PATH, index=False) del train, test, train_flattened, grouped, rolling_meds gc.collect()
def is_spike(series, window_size=3, threshold=3, scale=True): """ Flags spikes in an array-like object using a median filter of `window_size` and a `threshold` for the median difference. If `scale=False` the differences are not scale by the data standard deviation and the masking is "aggressive." Examples -------- >>> from pandas import Series, date_range >>> series = [33.43, 33.45, 34.45, 90.0, 35.67, 34.9, 43.5, 34.6, 33.7] >>> series = Series(series, index=date_range('1980-01-19', ... periods=len(series))) >>> series[is_spike(series, window_size=3, threshold=3, scale=False)] 1980-01-22 90.0 1980-01-25 43.5 dtype: float64 >>> series[is_spike(series, window_size=3, threshold=3, scale=True)] 1980-01-22 90.0 Freq: D, dtype: float64 """ # bfill+ffil needs a series and won't affect the median. series = Series(series) medians = rolling_median(series, window=window_size, center=True) medians = medians.fillna(method='bfill').fillna(method='ffill') difference = np.abs(series - medians).values if scale: return difference > (threshold * difference.std()) return difference > threshold
def rolling_functions_tests(p, d): # Old-fashioned rolling API assert_eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) assert_eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) assert_eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) assert_eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) assert_eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) assert_eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) assert_eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) assert_eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) # see note around test_rolling_dataframe for logic concerning precision assert_eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3), check_less_precise=True) assert_eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3), check_less_precise=True) assert_eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) assert_eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) assert_eq(pd.rolling_window(p, 3, win_type='boxcar'), dd.rolling_window(d, 3, win_type='boxcar')) # Test with edge-case window sizes assert_eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) assert_eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs assert_eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def _remove_outliers(self): print "Removing outliers for model fitting" temp = [] for i in [0, 1]: median_norm = pd.rolling_median(self.fcArray[i], window=self.rollingMedianWindow, center=True) difference = np.abs(self.fcArray[i] - median_norm) temp.append(self.fcArray[i][difference < self.outlierThreshold]) return np.asarray(temp)
def cleanVWC(df, outDir): # Specify the threshold and windowsize for the filter. Thresh is in # units of % (VWC), and windowsize counts the number of 15 minute # intervals over which to assess the threshold. thresh = 30 windowsize = 96 # We're going to output the results of the outlier detection to a # summary figure for review. Setup the plot structure outside the loop. f, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10)) plt.subplots_adjust(hspace=0.5) kw = dict(marker='o', linestyle='none', color='r', markersize=10) idx = 0 # Iterate over the VWC columns in the metstation DF for VWCvar in ['VWCC', 'VWCD']: # Create column identifiers for the threshold and filtered values filtThresh = VWCvar + '_fT' filteredVWC = VWCvar + '_f' # Rolling median filter with specified window size df[filtThresh] = pd.rolling_median( df[VWCvar], window=windowsize, center=True).fillna(method='bfill').fillna(method='ffill') # Tese filtered values against specified threshold difftest = np.abs(df[VWCvar] - df[filtThresh]) # Boolean for values that do not pass the test outlier_pos = difftest > thresh # Replace filtered values with NaN, so long as there are identified outliers df[filteredVWC] = df[VWCvar] if outlier_pos[outlier_pos == True].size > 0: df[filteredVWC][outlier_pos] = np.nan # populate the plot df[VWCvar].plot(ax=f.axes[idx], color='gray') if outlier_pos[outlier_pos == True].size > 0: df[VWCvar][outlier_pos].plot(ax=f.axes[idx], **kw) f.axes[idx].set_ylim([0, 105]) df[filteredVWC].plot(ax=f.axes[idx + 2], color='gray') f.axes[idx].set_title(VWCvar) idx += 1 for ax in f.axes: ax.set_xlabel('') ax1.set_ylabel('Soil VWC (%)') ax3.set_ylabel('Soil VWC \nfiltered (%)') plt.tight_layout() sns.despine() plotStationName = df['Locale'][0] + '_' + str(df['LoggerID'][0]) + '_' plt.savefig(outDir + 'QAQC/' + plotStationName + 'VWC_Filt.tif') plt.close() return df
def rollingStats(self, selectCol = [], splitCol=None, sepCol=None, startTime=None, endTime=None, window=60, quantile=0.1, freq='10s', min_periods=5 ): df = self.dfSetup() ## Selects a list of columns to use and splits a column into single type if it contains more than one # eg. if a file contains multiple sensor readings if (len(selectCol) > 0): dfSub = df[selectCol] else: dfSub = df if (splitCol and sepCol): dfSub = dfSub[dfSub[splitCol] == sepCol] ## Converts datetime column to datatime object index, then use it to create time slices # Time format '2015-10-17 09:00:00' May use the dfOther to use other data frames if (startTime and endTime): dfSub = dfSub[ startTime : endTime ] else: dfSub = dfSub if (splitCol): dfSub = dfSub.drop(splitCol, axis=1) # Remove columns used to split entries valueName = dfSub.columns.values[0] outList = [] counts = pd.rolling_count(dfSub,window,freq=freq).rename(columns = {valueName:'rolling_counts'}) outList.append(counts) means = pd.rolling_mean(dfSub, window, min_periods=min_periods, freq=freq).rename(columns = {valueName:'rolling_mean'}) outList.append(means) rms = np.sqrt(pd.rolling_mean(dfSub**2, window, min_periods=min_periods, freq=freq).rename(columns = {valueName:'rolling_rms'}) ) outList.append(rms) medians = pd.rolling_median(dfSub, window, min_periods=min_periods, freq=freq).rename(columns = {valueName:'rolling_median'}) outList.append(medians) stds = pd.rolling_std(dfSub, window, min_periods=min_periods, freq=freq).rename(columns = {valueName:'rolling_std'}) outList.append(stds) mins = pd.rolling_min(dfSub, window, min_periods=min_periods, freq=freq).rename(columns = {valueName:'rolling_min'}) outList.append(mins) maxs = pd.rolling_max(dfSub, window, min_periods=min_periods, freq=freq).rename(columns = {valueName:'rolling_max'}) outList.append(maxs) quants = pd.rolling_quantile(dfSub, window, quantile, min_periods=min_periods, freq=freq).rename(columns = {valueName:'rolling_quantile'}) outList.append(quants) dfOut = pd.concat(outList, axis=1) return dfOut
def plot_trend(self, statistic): '''Plots trends of specified statistic over time in a specified facility''' self.df[statistic].plot() stat_smooth = pd.rolling_median(self.df[statistic], 10) #plt.plot(stat_smooth, label = 'rolling({k})'.format(k=statistic)) stat_smooth.plot(label = 'rolling({k})'.format(k=statistic)) plt.title(statistic + " in " + self.name) plt.legend(loc = "best") plt.show()
def thermal_correction(self, context): if not self.num_of_freqs_to_thermal_adjust or self.num_of_freqs_to_thermal_adjust > len(self.big_frequencies): return 0 freqs = self.big_frequencies[-self.num_of_freqs_to_thermal_adjust:] spec = context.result.spec if spec.frequency not in freqs: return 0 data_path = os.path.join(context.output_directory, 'daq', '{}.csv'.format(self.big_core)) data = pd.read_csv(data_path)['power'] return _adjust_for_thermal(data, filt_method=lambda x: pd.rolling_median(x, 1000), thresh=0.9, window=5000)
def compare_window_sizes(series, sizeList): plots = [pd.rolling_median(series, size).plot() for size in sizeList] plt.title('Comparison of rolling_median() with different window sizes') plt.legend(sizeList) plt.show() plots = [pd.rolling_mean(series, size).plot() for size in sizeList] plt.title('Comparison of rolling_mean() with different window sizes') plt.legend(sizeList) plt.show()
def rolling_median(x, width): """Rolling median with mirrored edges.""" x, wing = check_inputs(x, width) # Pad the edges of the original array with mirror copies signal = np.concatenate((x[wing-1::-1], x, x[:-wing-1:-1])) with warnings.catch_warnings(): # NB: in pandas 0.18+ this function is deprecated warnings.simplefilter("ignore", FutureWarning) rolled = pd.rolling_median(signal, 2 * wing + 1, center=True) return rolled[wing:-wing]
def stepfilt(x, delta=3, window=7): assert window % 2 != 0, 'window size must be odd' n = window / 2 v = np.r_[np.repeat(x[0], n), x, np.repeat(x[-1], n)] # expanded arr m = pd.rolling_median(v, window, center=True) # filtered arr for i in range(len(m)-1): diff = m[i+1] - m[i] if np.abs(diff) > delta: v[i+1:] -= diff return v[n:-n], m[n:-n]
def median_detrend(self, window=75): #rolling median to normalise the flux and flux_err read from archive f = self._flux.copy() f[self.any_intransit] = np.nan f_median = pd.rolling_median(f, 75, center=True, min_periods=1) self._detrended_flux = self._flux / f_median self._detrended_flux_err = self._flux_err / f_median nonan = [i for i in self._detrended_flux_err if ((np.isnan(i)) == False)] self._detrended_flux_err_max = np.max(nonan)
def averaged_median_arr(e): rolling_median_window = 3 try: if len(e) >= rolling_median_window: e = pandas.rolling_median( e, window=rolling_median_window, min_periods=1) agg = numpy.mean(e) except ValueError: agg = None return agg
def stepfilt(x, delta=3, window=7): assert window % 2 != 0, 'window size must be odd' n = window / 2 v = np.r_[np.repeat(x[0], n), x, np.repeat(x[-1], n)] # expanded arr m = pd.rolling_median(v, window, center=True) # filtered arr for i in range(len(m) - 1): diff = m[i + 1] - m[i] if np.abs(diff) > delta: v[i + 1:] -= diff return v[n:-n], m[n:-n]
def rolling_median(x, width): """Rolling median with mirrored edges.""" x, wing = check_inputs(x, width) # Pad the edges of the original array with mirror copies signal = np.concatenate((x[wing - 1::-1], x, x[:-wing - 1:-1])) with warnings.catch_warnings(): # NB: in pandas 0.18+ this function is deprecated warnings.simplefilter("ignore", FutureWarning) rolled = pd.rolling_median(signal, 2 * wing + 1, center=True) return rolled[wing:-wing]
def get_outliers_multiple_filter(data_frame, component, filter_list): import pandas as pd from scipy import stats import math print('Started : get_outliers_multiple_filter') if filter_list.z_score is not None: data_frame[component + '_z'] = np.abs(stats.zscore(data_frame[component])) print('Z-score calculation done.') if filter_list.normal_disb is not None: mean = data_frame[component].mean() std = data_frame[component].std() print('Mean and std calculation done.') if filter_list.quantile is not None: q1 = data_frame[component].quantile(0.25) q3 = data_frame[component].quantile(0.75) iqr = q3-q1 #Interquartile range fence_low = q1 - filter_list.quantile.interquartile_range_scale*iqr fence_high = q3 + filter_list.quantile.interquartile_range_scale*iqr print('quantile calculation done.') if filter_list.rolling_medians is not None: from pandas import rolling_median data_frame['r_median'] = rolling_median(data_frame[component], window=3, center=True).fillna(method='bfill').fillna(method='ffill') data_frame['r_median_diff'] = np.abs(data_frame[component] - data_frame['r_median']) print('Rolling calculation done.') index_list_to_drop = [] for index, row in data_frame.iterrows(): if filter_list.ab_ignore_min_max is not None: if row[component]< filter_list.ab_ignore_min_max.min or row[component]> filter_list.ab_ignore_min_max.max: index_list_to_drop.append(index) if filter_list.unreal_total_field is not None: if math.isnan(row['F']) or row['F'] < filter_list.unreal_total_field.total_field_min or row['F']> filter_list.unreal_total_field.total_field_max: index_list_to_drop.append(index) if filter_list.ab_ignore_sudden_inc is not None: int_index = data_frame.index.get_loc(index) if int_index > 1 and abs(row[component] - data_frame.iloc[int_index - 1][component]) > filter_list.ab_ignore_sudden_inc.threshold: index_list_to_drop.append(index) if filter_list.z_score is not None: if row[component + '_z'] > filter_list.z_score.threshold: index_list_to_drop.append(index) if filter_list.normal_disb is not None: if (row[component] < (mean - filter_list.normal_disb.SD_range_scalar * std)) or (row[component] > (mean + filter_list.normal_disb.SD_range_scalar * std)): index_list_to_drop.append(index) if filter_list.quantile is not None: if (row[component] < fence_low) or (row[component] > fence_high): index_list_to_drop.append(index) if filter_list.rolling_medians is not None: if row['r_median_diff'] > filter_list.rolling_medians.threshold: index_list_to_drop.append(index) result = data_frame.loc[index_list_to_drop] print('Done : get_outliers_multiple_filter') return result
def averaged_median_arr(e): rolling_median_window = 3 try: if len(e) >= rolling_median_window: e = pandas.rolling_median(e, window=rolling_median_window, min_periods=1) agg = numpy.mean(e) except ValueError: agg = None return agg
def median_detrend(self, window=75): #rolling median to normalise the flux and flux_err read from archive f = self._flux.copy() f[self.any_intransit] = np.nan f_median = pd.rolling_median(f, 75, center=True, min_periods=1) self._detrended_flux = self._flux / f_median self._detrended_flux_err = self._flux_err / f_median nonan = [ i for i in self._detrended_flux_err if ((np.isnan(i)) == False) ] self._detrended_flux_err_max = np.max(nonan)
def RemoveOutlier(values): threshold = values.mean() + 3 * values.std() ResEtemp = rolling_median( values, window=15, center=True).fillna(method='bfill').fillna(method='ffill') difference = np.abs(values - ResEtemp) outlier_idx = difference > threshold #outlier_idx = values > threshold values[outlier_idx] = threshold ##ResE = NormValues(ResEtemp) return values
def phase_areas(lv_seg): ''' determines approximate end-systole and end-diastole frame indices ''' lv_segs = remove_periphery(lv_seg) lv_areas = extract_areas(lv_segs) lv_areas = rolling_median(pd.DataFrame(lv_areas)[0], window=3, center=True).fillna(method='bfill').fillna(method='ffill').tolist() x, y = smooth_fft(lv_areas, 2500) frame10 = np.argsort(y)[np.int(0.10*len(y))] frame90 = np.argsort(y)[np.int(0.90*len(y))] return frame10, frame90
def ProfilePlotVerificador(Data, H, surface,Data2=None, H2=None,scale='log', \ labelData='Variable', fecha=dt.datetime(1992,1,15),\ name='Prueba.png'): """ Plot a CALIPSO single profile of data IMPUTS: Data : array 1D of data to plot H : Height of data [km] Data2: array 1D of data to plot of LIDAR ground based H2 : Height of data of LIDAR ground based [km] scale : Type of scale ["linear", "log", "symlog", "logit"] lablelData: Name to show in the data plot fecha : datetime of the data name : name of the outputfile OUTPUTS file save in the Path_fig folder """ plt.cla() plt.clf() plt.close('all') fig = plt.figure(figsize=(10, 16)) ax1 = fig.add_axes([0, 0, 1, 1]) idx = np.where(H >= surface)[0] ax1.plot(Data[idx], H[idx] - H[idx][0], color=AzulChimba, alpha=0.7) # ax.plot(Data,H, color=AzulChimba, alpha=0.7) # media movil a = pd.DataFrame(Data[idx], index=H[idx] - H[idx][0]) # a = pd.DataFrame(Data, index=H) c = pd.rolling_median(a, window=10, min_periods=1, center=True) ax1.plot(c.values.ravel(), c.index.values, linewidth=2, color=Azulillo) ax1.set_xscale(scale) ax1.set_ylabel('Altitude [km]') ax1.set_xlabel(labelData) if Data2 is not None: ax1.plot(Data2[idx], H2[idx] - H2[idx][0], color=Naranja, alpha=0.7) a = pd.DataFrame(Data2[idx], index=H2[idx] - H2[idx][0]) c = pd.rolling_median(a, window=10, min_periods=1, center=True) ax1.plot(c.values.ravel(), c.index.values, linewidth=2, color=Naranja) plt.savefig(Path_fig + name, transparent=True, bbox_inches='tight')
def estimate_diff(self, s1, s2): l1 = float(len(s1)) l2 = float(len(s2)) # Don't estimate time series that have huge difference in number of # elements if max(l1, l2) / min(l1, l2) > self.MAX_SIZE_DIFF: return -1 # Don't estimate very small time series if min(l1, l2) < self.MIN_DATA_POINTS: return -1 # Heuristic coefficient confidence = 0.0 # Compare correlation coefficient if s1.corr(s2) < 0.9: confidence += 1.0 # Compare mean values diff = abs(s1.mean() - s2.mean()) if diff > self.ALLOWED_NOISE * s1.mean(): confidence += 1.0 # Compare 4 different percentiles for q in (0.5, 0.75, 0.9, 0.95): diff = abs(s1.quantile(q) - s2.quantile(q)) if diff > self.ALLOWED_NOISE * s1.quantile(q): confidence += 0.5 # Compare maximum values diff = abs(s1.max() - s2.max()) if diff > self.ALLOWED_NOISE * s1.max(): confidence += 1.0 # Primary trend comparison t1 = pd.rolling_median(s1, window=5) t2 = pd.rolling_median(s1, window=5) if abs((t1 - t2).mean()) > self.ALLOWED_NOISE * t1.mean(): confidence += 2.0 # Return confidence as rounded percentage value return round(100 * confidence / self.MAX_CONFIDENCE)
def strat(M,g,j,X_code,Y_code,X_close,X_volume,Y_close,Y_volume): """ This function creates a dataframe with results to a spread trading strategy (see HW2 of FINM 33150 - Quantitative Strategies and Regression) Inputs: M ~ return difference calculation time frame. M cannot exceed the number of trading days between 2013-12-02 and 2014-01-01 g ~ entering threshold j ~ exiting threshold s ~ stop loss threshold X_code ~ Quandl code for X Y_code ~ Quandl code for Y X_close ~ X column name for close X_volume ~ X column name for volume Y_close ~ Y column name for close Y_volume ~ Y column name for volume Example of calling function: strat(10,0.01,0.008,0.10,'GOOG/NYSE_XSD','YAHOO/SMH','GOOG.NYSE_XSD - Close', 'GOOG.NYSE_XSD - Volume','YAHOO.SMH - Close','YAHOO.SMH - Volume') """ # grab data using Quandl ETF_data = Quandl.get(list((X_code,Y_code)),authtoken=auth,trim_start=start_date,trim_end=end_date,returns="pandas") df = pd.DataFrame(ETF_data.ix[:,(X_close,X_volume,Y_close,Y_volume)]) #subset df.columns = ['XP','XV','YP','YV'] df['XDDV'] = df.XP*df.XV # calculate daily dollar volumes df['Nt'] = pd.rolling_median(df.XDDV,15).shift(1)# 15 day rolling median K = np.max(2*df.Nt) # capital - set K now that we have Nt df['XR'] = np.log(df.XP) - np.log(df.XP.shift(1)) #logrets df['YR'] = np.log(df.YP) - np.log(df.YP.shift(1)) df['Delta'] = df.XR-df.YR # difference of X and Y df['DeltaM'] = pd.rolling_sum(df.Delta,M).shift(1) #M day historical accumulated difference df = df[df.index >= trade_begin] # drop unnecessary date range df['Signal'] = np.nan # add empty trade signal column df.Signal[df.DeltaM > g] = 1 # entering or maintaining trade df.Signal[df.DeltaM < -g] = -1 # entering or maintaining trade df.Signal[np.abs(df.DeltaM) < j] = 0 # exiting or out of trade df['EOM'] = np.nan # end of month df.EOM[(df.shift(1,freq='B').index.day <= 3) & (df.shift(1,freq='B').index.day-df.index.day < -1)] = 1 # day before 1st day df.Signal[(df.shift(1,freq='B').index.day <= 3) & (df.shift(1,freq='B').index.day-df.index.day < -1)] = 0 df.Signal[((df.Signal == -1) & (df.DeltaM > j)) | (df.Signal == 1) & (df.DeltaM < j)] = 0 for i in range(1,len(df)): if np.isnan(df.Signal[i]):# if between g and j df.Signal[i] = df.Signal[i-1] # fill in with current position df['Entry'] = 1*(((df.Signal == 1) | (df.Signal == -1)) & ((df.shift(1).Signal == 0) | (np.isnan(df.shift(1).Signal) == True))) # entry point df.Entry[((df.Signal == -1) & (df.shift(1).Signal == 1)) | ((df.Signal == 1) & (df.shift(1).Signal == -1))] = 1 # jumping g to -g or vice versa df['Exit'] = 1*((df.Signal == 0) & ((df.shift(1).Signal == 1) | ((df.shift(1).Signal == -1)))) # exit point df['Nx'] = np.round(-df.Signal*df.Nt/100/df.XP,0) # size of X trade df['Ny'] = np.round(df.Signal*df.Nt/100/df.YP,0) # size of Y trade df['Profit'] = pd.DataFrame((df.Nx.shift(1)*df.XP.shift(1)*df.XR)+df.Ny.shift(1)*df.YP.shift(1)*df.YR) # dollar profit(loss) df['Cum_Profit'] = np.cumsum(df.Profit) #cumulative profit df['K'] = np.round(K + df.Cum_Profit,0) # capital based on changes in profit df['Return'] = 252*df.Profit/df.K.shift(1) # annualised returns return df
def get_time_series(self, observables): for ol in observables: for observable in ol: if observable: raw_data = self.seriesly.query_data(observable) if raw_data: s = pd.Series(raw_data) if len(s.unique()) == 1: continue s = pd.rolling_median(s, window=3) title = Plotter.generate_title(observable) yield title, s
def rolling_median(self, data_frame, periods): """ rolling_median - Calculates the rolling moving average Parameters ---------- data_frame : DataFrame contains time series periods : int number of periods in the median Returns ------- DataFrame """ return pandas.rolling_median(data_frame, periods)
def rolling_median(x, width): """Rolling median with mirrored edges. Contributed by Peter Otten to comp.lang.python. This is (somehow) faster than pandas' Cythonized skip-list implementation for arrays smaller than ~100,000 elements. Source: https://bitbucket.org/janto/snippets/src/tip/running_median.py https://groups.google.com/d/msg/comp.lang.python/0OARyHF0wtA/SEs-glW4t6gJ """ x, wing = check_inputs(x, width) # Pad the edges of the original array with mirror copies signal = np.concatenate((x[wing-1::-1], x, x[:-wing-1:-1])) rolled = pd.rolling_median(signal, 2 * wing + 1, center=True) return rolled[wing:-wing]
def clean_data( df , var , window = 3, threshold = 0.5): from pandas import rolling_median from numpy import abs original_columns = df.columns rolling_median = rolling_median( df[var], window = window, center = True, ) df['this_mean'] = ( df[var].mean() + rolling_median ) / 2. df = df[ abs( df[var] - df.this_mean ) <= threshold * df[var].std() ] return df[ original_columns ]
def interpolate_outliers(angle, data, threshold=0.5, window=12, plot_me=False): """ Function to smooth outliers from the data set. Applys moving average smoothing and cyclic boundary conditions. Threshold is set by: threshold - number of standard deviations from average which defines outliers window - number of points in each direction used for average """ df = pd.DataFrame({"parameter": data}, index=angle) # mean_data = np.mean(df['parameter']) df["data_mean"] = ( pd.rolling_median(df["parameter"].copy(), window=window, center=True) .fillna(method="bfill") .fillna(method="ffill") ) difference = np.abs(df["parameter"] - df["data_mean"]) # mean_data)# outlier_idx = difference > threshold * df["parameter"].std() # df['data_mean'].plot() # s = df['parameter'].copy() # s[outlier_idx] = np.nan # s.interpolate(method='spline', order=1, inplace=True) # df['cleaned_parameter'] = s tst = np.array(outlier_idx) datamean = np.array(df["data_mean"]) s = np.array(df["parameter"]) itms = len(outlier_idx) for i in range(itms): if ( tst[i] == True or tst[(i - 1) % itms] == True or tst[(i + 1) % itms] == True or tst[(i - 2) % itms] == True or tst[(i + 2) % itms] == True ): tmp = datamean[i] s[i] = tmp # print s df["cleaned_parameter"] = s if plot_me == True: figsize = (7, 2.75) fig, ax = plt.subplots(figsize=figsize) df["parameter"].plot(title="cleaned vs unclean Parameter") df["cleaned_parameter"].plot() ax.set_ylim(min(df["cleaned_parameter"]), max(df["cleaned_parameter"])) return np.array(df["cleaned_parameter"])
def noisyUser(df,by,col = 'Power',window = 9): """ Define user with maximum signal noise. :param df: pd.DataFrame contains several user's and per one laccid {pd.DataFrame} :param col: column to compute {'str'} :param window: rolling window length {'int'} :return: name of user {'str'} """ maxNoise = 0 noises = {} grouped = df.groupby(by) for user,gr in grouped: user_fltrd = pd.rolling_median(gr[col],window,center = True) noisy_part = gr[user_fltrd != gr[col]].shape[0]/float(gr.shape[0]) if noisy_part > maxNoise: noisyUser = user maxNoise = noisy_part noises.update({user:noisy_part}) return noisyUser,noises
def test_ts_median(self): self.env.add_operator('ts_median', { 'operator': OperatorTSMedian, 'arg1': {'value': [3, 5]}, }) string1 = 'ts_median(2, open1)' gene1 = self.env.parse_string(string1) self.assertFalse(gene1.validate()) string2 = 'ts_median(3, open1)' gene2 = self.env.parse_string(string2) self.assertTrue(gene2.validate()) self.assertEqual(gene2.dimension, 'CNY') self.assertRaises(IndexError, gene2.eval, self.env, self.date1, self.date2) date1 = self.env.shift_date(self.date1, 2) df = pd.rolling_median(self.env.get_data_value('open1'), 3).iloc[2:] self.assertTrue( frame_equal( gene2.eval(self.env, date1, self.date2), df) )
def step_filt(x, delta=3, window=7): """Filter step-changes in a verctor. Detects level-shifts in a time series and corrects them by levelling both sides of the record. Discriminates steps from peaks using a moving-median approach. """ assert window % 2 != 0, 'window size must be odd' n = window / 2 v = np.r_[np.repeat(x[0], n), x, np.repeat(x[-1], n)] # expanded arr m = pd.rolling_median(v, window, center=True) # filtered arr for i in range(len(m)-1): diff = m[i+1] - m[i] if np.abs(diff) > delta: #plt.plot(v) v[i+1:] -= diff #plt.plot(v) #plt.show() return v[n:-n], m[n:-n]
def rolling_tests(p, d): eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3)) eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3)) eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) mad = lambda x: np.fabs(x - x.mean()).mean() eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) eq(pd.rolling_window(p, 3, 'boxcar'), dd.rolling_window(d, 3, 'boxcar')) # Test with edge-case window sizes eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def rolling_functions_tests(p, d): # Old-fashioned rolling API eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3)) eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3)) eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) with ignoring(ImportError): eq(pd.rolling_window(p, 3, "boxcar"), dd.rolling_window(d, 3, "boxcar")) # Test with edge-case window sizes eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def rolling_fn(x, w, fn): #print "Applying rolling fn %s with window size %d" % (fn, w) builtin = { np.mean: pandas.rolling_mean, np.median: pandas.rolling_median, np.min: pandas.rolling_min, np.max: pandas.rolling_max, np.var: rolling_var, # not sure why I get NaN from pandas functions np.std: rolling_std, crossing_rate: rolling_crossing_rate, }.get(fn, None) if builtin: aggregated = builtin(x, w) elif fn == mad: medians = pandas.rolling_median(x, w) abs_diffs = np.abs(x - medians) aggregated = pandas.rolling_mean(abs_diffs, w) else: aggregated = pandas.rolling_apply(x, w, fn) n_bad = np.sum(~np.isfinite(aggregated[w:])) if n_bad > 0: print "[rolling_fn] Number bad entries:", n_bad return aggregated