def test_rolling_max_how_resample(self): indices = [datetime(1975, 1, i) for i in range(1, 6)] # So that we can have 3 datapoints on last day (4, 10, and 20) indices.append(datetime(1975, 1, 5, 1)) indices.append(datetime(1975, 1, 5, 2)) series = Series(list(range(0, 5)) + [10, 20], index=indices) # Use floats instead of ints as values series = series.map(lambda x: float(x)) # Sort chronologically series = series.sort_index() # Default how should be max expected = Series([0.0, 1.0, 2.0, 3.0, 20.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) x = mom.rolling_max(series, window=1, freq='D') assert_series_equal(expected, x) # Now specify median (10.0) expected = Series([0.0, 1.0, 2.0, 3.0, 10.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) x = mom.rolling_max(series, window=1, freq='D', how='median') assert_series_equal(expected, x) # Now specify mean (4+10+20)/3 v = (4.0+10.0+20.0)/3.0 expected = Series([0.0, 1.0, 2.0, 3.0, v], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) x = mom.rolling_max(series, window=1, freq='D', how='mean') assert_series_equal(expected, x)
def test_rolling_max_how_resample(self): indices = [datetime(1975, 1, i) for i in range(1, 6)] # So that we can have 3 datapoints on last day (4, 10, and 20) indices.append(datetime(1975, 1, 5, 1)) indices.append(datetime(1975, 1, 5, 2)) series = Series(list(range(0, 5)) + [10, 20], index=indices) # Use floats instead of ints as values series = series.map(lambda x: float(x)) # Sort chronologically series = series.sort_index() # Default how should be max expected = Series([0.0, 1.0, 2.0, 3.0, 20.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) x = mom.rolling_max(series, window=1, freq='D') assert_series_equal(expected, x) # Now specify median (10.0) expected = Series([0.0, 1.0, 2.0, 3.0, 10.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) x = mom.rolling_max(series, window=1, freq='D', how='median') assert_series_equal(expected, x) # Now specify mean (4+10+20)/3 v = (4.0 + 10.0 + 20.0) / 3.0 expected = Series([0.0, 1.0, 2.0, 3.0, v], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) x = mom.rolling_max(series, window=1, freq='D', how='mean') assert_series_equal(expected, x)
def find_peak_ind(data, width, width_roll_mean=200, roll_max_peaks_threshold=4.0, is_ret_roll_max_peaks=False): """ Calculate the indices of isolated maxima in the data array usually containing the result of a correlation calculation bewteen a timeseries and a pattern. Parameters ---------- data : 1d ndarray Timeseries,usually containing the result of a correlation calculation between a timeseries and a pattern. width : int The width of an interval in which the maximum is found. I.e. two maxima have to be at least width apart to be registered as separate. width_roll_mean : int The width used for the rolling mean normalisation of the data for better identification of pattern matches as it only looks for narrow peaks. roll_max_peaks_threshold : float The threshold for when a peak is considered high enough to be added to the returned indices. A peak has to be roll_max_peaks_threshold times larger in amplitude than the rolling mean to be registered as valid peak. is_ret_roll_max_peaks : bool Return roll_max_peaks or not. Default is not. Returns ------- peak_inds : list List of indices of the peaks in data. roll_max_peaks : ndarray, if is_ret_roll_max_peaks Rolling maximum of data normalised by its rolling mean. """ roll_mean = mom.rolling_mean(data, width_roll_mean, center=True) # plt.figure() # plt.plot(data) # plt.show() roll_mean = 1 roll_max_peaks = mom.rolling_max(data / roll_mean, width, center=False) # -- Calculate the centered rolling max. roll_max_peaks_c = mom.rolling_max(data / roll_mean, width, center=True) roll_peak_inds, = np.nonzero((roll_max_peaks > roll_max_peaks_threshold)) peak_inds = [] for c in roll_peak_inds[1:-1]: # -- max is when left entry in roll_max_peaks is smaller and right is equal and # if in centered roll_max_peaks_c the left (and the right) are the same if (roll_max_peaks[c - 1] < roll_max_peaks[c] and np.abs(roll_max_peaks[c] - roll_max_peaks[c + 1]) < 0.0001 and np.abs(roll_max_peaks[c] - roll_max_peaks_c[c - 1]) < 0.0001): peak_inds.append(c) if is_ret_roll_max_peaks: return peak_inds, roll_max_peaks else: return peak_inds
def find_peak_ind(data,width,width_roll_mean = 200,roll_max_peaks_threshold = 4.0, is_ret_roll_max_peaks = False): """ Calculate the indices of isolated maxima in the data array usually containing the result of a correlation calculation bewteen a timeseries and a pattern. Parameters ---------- data : 1d ndarray Timeseries,usually containing the result of a correlation calculation between a timeseries and a pattern. width : int The width of an interval in which the maximum is found. I.e. two maxima have to be at least width apart to be registered as separate. width_roll_mean : int The width used for the rolling mean normalisation of the data for better identification of pattern matches as it only looks for narrow peaks. roll_max_peaks_threshold : float The threshold for when a peak is considered high enough to be added to the returned indices. A peak has to be roll_max_peaks_threshold times larger in amplitude than the rolling mean to be registered as valid peak. is_ret_roll_max_peaks : bool Return roll_max_peaks or not. Default is not. Returns ------- peak_inds : list List of indices of the peaks in data. roll_max_peaks : ndarray, if is_ret_roll_max_peaks Rolling maximum of data normalised by its rolling mean. """ roll_mean = mom.rolling_mean(data, width_roll_mean,center=True) # plt.figure() # plt.plot(data) # plt.show() roll_mean = 1 roll_max_peaks = mom.rolling_max(data/roll_mean,width,center=False) # -- Calculate the centered rolling max. roll_max_peaks_c = mom.rolling_max(data/roll_mean,width,center=True) roll_peak_inds, = np.nonzero((roll_max_peaks > roll_max_peaks_threshold)) peak_inds = [] for c in roll_peak_inds[1:-1]: # -- max is when left entry in roll_max_peaks is smaller and right is equal and # if in centered roll_max_peaks_c the left (and the right) are the same if (roll_max_peaks[c-1] < roll_max_peaks[c] and np.abs(roll_max_peaks[c]-roll_max_peaks[c+1]) < 0.0001 and np.abs(roll_max_peaks[c]-roll_max_peaks_c[c-1]) < 0.0001): peak_inds.append(c) if is_ret_roll_max_peaks: return peak_inds,roll_max_peaks else: return peak_inds
def test_rolling_max(self): self._check_moment_func(mom.rolling_max, np.max) a = np.array([1,2,3,4,5]) b = mom.rolling_max(a, window=100, min_periods=1) assert_almost_equal(a, b) self.assertRaises(ValueError, mom.rolling_max, np.array([1,2,3]), window=3, min_periods=5)
def test_rolling_functions_window_non_shrinkage(self): # GH 7764 s = Series(range(4)) s_expected = Series(np.nan, index=s.index) df = DataFrame([[1, 5], [3, 2], [3, 9], [-1, 0]], columns=['A', 'B']) df_expected = DataFrame(np.nan, index=df.index, columns=df.columns) df_expected_panel = Panel(items=df.index, major_axis=df.columns, minor_axis=df.columns) functions = [ lambda x: mom.rolling_cov( x, x, pairwise=False, window=10, min_periods=5), lambda x: mom.rolling_corr( x, x, pairwise=False, window=10, min_periods=5), lambda x: mom.rolling_max(x, window=10, min_periods=5), lambda x: mom.rolling_min(x, window=10, min_periods=5), lambda x: mom.rolling_sum(x, window=10, min_periods=5), lambda x: mom.rolling_mean(x, window=10, min_periods=5), lambda x: mom.rolling_std(x, window=10, min_periods=5), lambda x: mom.rolling_var(x, window=10, min_periods=5), lambda x: mom.rolling_skew(x, window=10, min_periods=5), lambda x: mom.rolling_kurt(x, window=10, min_periods=5), lambda x: mom.rolling_quantile( x, quantile=0.5, window=10, min_periods=5), lambda x: mom.rolling_median(x, window=10, min_periods=5), lambda x: mom.rolling_apply(x, func=sum, window=10, min_periods=5), lambda x: mom.rolling_window( x, win_type='boxcar', window=10, min_periods=5), ] for f in functions: try: s_result = f(s) assert_series_equal(s_result, s_expected) df_result = f(df) assert_frame_equal(df_result, df_expected) except (ImportError): # scipy needed for rolling_window continue functions = [ lambda x: mom.rolling_cov( x, x, pairwise=True, window=10, min_periods=5), lambda x: mom.rolling_corr( x, x, pairwise=True, window=10, min_periods=5), # rolling_corr_pairwise is depracated, so the following line should be deleted # when rolling_corr_pairwise is removed. lambda x: mom.rolling_corr_pairwise(x, x, window=10, min_periods=5 ), ] for f in functions: df_result_panel = f(df) assert_panel_equal(df_result_panel, df_expected_panel)
def test_rolling_max_gh6297(self): """Replicate result expected in GH #6297""" indices = [datetime(1975, 1, i) for i in range(1, 6)] # So that we can have 2 datapoints on one of the days indices.append(datetime(1975, 1, 3, 6, 0)) series = Series(range(1, 7), index=indices) # Use floats instead of ints as values series = series.map(lambda x: float(x)) # Sort chronologically series = series.sort_index() expected = Series([1.0, 2.0, 6.0, 4.0, 5.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) x = mom.rolling_max(series, window=1, freq="D") assert_series_equal(expected, x)
def test_rolling_max_gh6297(self): """Replicate result expected in GH #6297""" indices = [datetime(1975, 1, i) for i in range(1, 6)] # So that we can have 2 datapoints on one of the days indices.append(datetime(1975, 1, 3, 6, 0)) series = Series(range(1, 7), index=indices) # Use floats instead of ints as values series = series.map(lambda x: float(x)) # Sort chronologically series = series.sort_index() expected = Series([1.0, 2.0, 6.0, 4.0, 5.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) x = mom.rolling_max(series, window=1, freq='D') assert_series_equal(expected, x)
def test_rolling_functions_window_non_shrinkage(self): # GH 7764 s = Series(range(4)) s_expected = Series(np.nan, index=s.index) df = DataFrame([[1,5], [3, 2], [3,9], [-1,0]], columns=['A','B']) df_expected = DataFrame(np.nan, index=df.index, columns=df.columns) df_expected_panel = Panel(items=df.index, major_axis=df.columns, minor_axis=df.columns) functions = [lambda x: mom.rolling_cov(x, x, pairwise=False, window=10, min_periods=5), lambda x: mom.rolling_corr(x, x, pairwise=False, window=10, min_periods=5), lambda x: mom.rolling_max(x, window=10, min_periods=5), lambda x: mom.rolling_min(x, window=10, min_periods=5), lambda x: mom.rolling_sum(x, window=10, min_periods=5), lambda x: mom.rolling_mean(x, window=10, min_periods=5), lambda x: mom.rolling_std(x, window=10, min_periods=5), lambda x: mom.rolling_var(x, window=10, min_periods=5), lambda x: mom.rolling_skew(x, window=10, min_periods=5), lambda x: mom.rolling_kurt(x, window=10, min_periods=5), lambda x: mom.rolling_quantile(x, quantile=0.5, window=10, min_periods=5), lambda x: mom.rolling_median(x, window=10, min_periods=5), lambda x: mom.rolling_apply(x, func=sum, window=10, min_periods=5), lambda x: mom.rolling_window(x, win_type='boxcar', window=10, min_periods=5), ] for f in functions: try: s_result = f(s) assert_series_equal(s_result, s_expected) df_result = f(df) assert_frame_equal(df_result, df_expected) except (ImportError): # scipy needed for rolling_window continue functions = [lambda x: mom.rolling_cov(x, x, pairwise=True, window=10, min_periods=5), lambda x: mom.rolling_corr(x, x, pairwise=True, window=10, min_periods=5), # rolling_corr_pairwise is depracated, so the following line should be deleted # when rolling_corr_pairwise is removed. lambda x: mom.rolling_corr_pairwise(x, x, window=10, min_periods=5), ] for f in functions: df_result_panel = f(df) assert_panel_equal(df_result_panel, df_expected_panel)
def aggregated_line_seeds(results, title): plt.close() sorted_points = np.array(sorted(results, key=itemgetter(1))) sorted_time = sorted_points[:, 1] / 60 / 60 sorted_errors = sorted_points[:, 2] if is_regression: sorted_errors = np.log10(sorted_errors) y_mean = stats.rolling_mean(sorted_errors, 5) # y_std = stats.rolling_std(sorted_errors, 5) y_upper = stats.rolling_max(sorted_errors, 5) y_lower = stats.rolling_min(sorted_errors, 5) plt.plot(sorted_time, y_mean, color="red", label="Rolling mean") # plt.legend() plt.fill_between(sorted_time, y_mean, y_upper, facecolor='gray', interpolate=True, alpha=0.5) plt.fill_between(sorted_time, y_lower, y_mean, facecolor='gray', interpolate=True, alpha=0.5) plt.xlabel("Time (h)") if is_regression: plt.ylabel("log(RMSE)") else: plt.ylabel("% class. error") plt.ylim(0, 100) plt.margins(0.05, 0.05) plt.title(title) plt.savefig("%s/plots%s/trajectories-%s.aggregated.png" % (os.environ['AUTOWEKA_PATH'], suffix, title), bbox_inches='tight')
def hhv(s, n): return moments.rolling_max(s, n)
# ax.set_xlabel('Time (h)') # ax.set_ylabel('RMSE') # ax.set_yscale('log') # ax.set_xlim(0,30) # colors = sns.color_palette("husl", 25) # for i in range(0,25): # ax.scatter(time_by_seed[i], error_by_seed[i], c=cm.hsv(i/25.,1), s=[30]*len(time_by_seed[i])) # ax.scatter(time_by_seed[i], error_by_seed[i], c=[colors[i]]*len(time_by_seed[i]), s=[30]*len(time_by_seed[i])) ax1.set_xlabel('Time (h)') ax1.set_ylabel('RMSE') ax1.set_xlim(-1, 30) y_mean = stats.rolling_mean(sorted_errors, 5) y_std = stats.rolling_std(sorted_errors, 5) # y_upper = y_mean + 2*y_std y_upper = stats.rolling_max(sorted_errors, 5) # y_lower = y_mean - 2*y_std y_lower = stats.rolling_min(sorted_errors, 5) sorted_data = DataFrame(data=sorted_points, columns=['time', 'binned_time', 'error', 'seed']) # sns.jointplot("binned_time", "error", sorted_data) # ax1.scatter(sorted_binned_time, sorted_errors) ax1.plot(sorted_time, y_mean, color="red", label="Rolling mean") # ax1.errorbar(sorted_binned_time, sorted_errors, marker='o', ms=8, yerr=3*y_std, ls='dotted', label="Rolling mean") ax1.legend() ax1.fill_between(sorted_time, y_mean, y_upper, facecolor='gray', interpolate=True, alpha=0.5) ax1.fill_between(sorted_time, y_lower, y_mean, facecolor='gray', interpolate=True, alpha=0.5) if not os.path.isdir("plots"): os.mkdir("plots") #fig.savefig("plots/points.png", bbox_inches='tight') fig.savefig("%s/plots%s/points-%s.png" % (os.environ['AUTOWEKA_PATH'], suffix, title), bbox_inches='tight') # plt.show()
# ax.set_xlabel('Time (h)') # ax.set_ylabel('RMSE') # ax.set_yscale('log') # ax.set_xlim(0,30) # colors = sns.color_palette("husl", 25) # for i in range(0,25): # ax.scatter(time_by_seed[i], error_by_seed[i], c=cm.hsv(i/25.,1), s=[30]*len(time_by_seed[i])) # ax.scatter(time_by_seed[i], error_by_seed[i], c=[colors[i]]*len(time_by_seed[i]), s=[30]*len(time_by_seed[i])) ax1.set_xlabel('Time (h)') ax1.set_ylabel('RMSE') ax1.set_xlim(-1, 30) y_mean = stats.rolling_mean(sorted_errors, 5) y_std = stats.rolling_std(sorted_errors, 5) # y_upper = y_mean + 2*y_std y_upper = stats.rolling_max(sorted_errors, 5) # y_lower = y_mean - 2*y_std y_lower = stats.rolling_min(sorted_errors, 5) sorted_data = DataFrame(data=sorted_points, columns=['time', 'binned_time', 'error', 'seed']) # sns.jointplot("binned_time", "error", sorted_data) # ax1.scatter(sorted_binned_time, sorted_errors) ax1.plot(sorted_time, y_mean, color="red", label="Rolling mean") # ax1.errorbar(sorted_binned_time, sorted_errors, marker='o', ms=8, yerr=3*y_std, ls='dotted', label="Rolling mean") ax1.legend() ax1.fill_between(sorted_time, y_mean, y_upper, facecolor='gray', interpolate=True, alpha=0.5)
# start of this month. current_date = start_date = datetime.date(2010, 7, 1) day = datetime.timedelta(days=1) end_date = datetime.date.today() pv_data = [] for i in range((end_date - start_date).days): try: html = html_for_day(current_date) except urllib2.URLError, e: continue finally: current_date = current_date + day tree = ElementTree() tree.parse(handle_illformed_xml(html)) items = tree.iterfind('item') pv_data.extend(map(item_to_tuple, items)) pv = DataFrame.from_records(pv_data, index='timestamp', columns=["timestamp", "expost"]) fig = pyplot.figure() rolling_max(pv, 4*24*30).plot(rot=15) pyplot.title("Solar Photovoltaic production in Amprion's network Germany") pyplot.ylabel("Rolling month maximum power produced [MW]") pyplot.xlabel("Date") pyplot.grid() pyplot.savefig('ampiron.png')