def initial_trend_values_test(self): hwm = HoltWintersMethod(seasonLength=4) data = [[0, 362.0], [1,385.0], [2, 432.0], [3, 341.0], [4, 382.0], [5, 425.0]] tsSrc = TimeSeries.from_twodim_list(data) trend = hwm.initialTrendSmoothingFactors(tsSrc) assert trend == 7.5, "Initial Trend should be 7.5 but is %f" % trend
def smoothing_test(self): """ Test if the smoothing works correctly""" data = [ 362.0, 385.0, 432.0, 341.0, 382.0, 409.0, 498.0, 387.0, 473.0, 513.0, 582.0, 474.0, 544.0, 582.0, 681.0, 557.0, 628.0, 707.0, 773.0, 592.0, 627.0, 725.0, 854.0, 661.0 ] tsSrc = TimeSeries.from_twodim_list(zip(range(len(data)), data)) expected = [[0.0, 362.0], [1.0, 379.93673257607463], [2.0, 376.86173719924875], [3.0, 376.0203652542205], [4.0, 408.21988583215574], [5.0, 407.16235446485433], [6.0, 430.0950666716297], [7.0, 429.89797609228435], [8.0, 489.4888959723074], [9.0, 507.8407281475308], [10.0, 506.3556647249702], [11.0, 523.9422448655133], [12.0, 556.0311543025242], [13.0, 573.6520991970604], [14.0, 590.2149136780341], [15.0, 611.8813425659495], [16.0, 637.0393967524727], [17.0, 684.6600411792656], [18.0, 675.9589298142507], [19.0, 659.0266828674846], [20.0, 644.0903317144154], [21.0, 690.4507762388047], [22.0, 735.3219292023371], [23.0, 737.9752345691215]] hwm = HoltWintersMethod(.7556, 0.0000001, .9837, 4, valuesToForecast=0) initialA_2 = hwm.computeA(2, tsSrc) assert initialA_2 == 510.5, "Third initial A_2 should be 510.5, but it %d" % initialA_2 initialTrend = hwm.initialTrendSmoothingFactors(tsSrc) assert initialTrend == 9.75, "Initial Trend should be 9.75 but is %d" % initialTrend #correctness is not proven, but will be enough for regression testing resTS = tsSrc.apply(hwm) expectedTS = TimeSeries.from_twodim_list(expected) assert len(resTS) == len(expectedTS) assert resTS == expectedTS, "Smoothing result not correct."
def initial_trend_values_test(self): hwm = HoltWintersMethod(seasonLength=4) data = [[0, 362.0], [1, 385.0], [2, 432.0], [3, 341.0], [4, 382.0], [5, 425.0]] tsSrc = TimeSeries.from_twodim_list(data) trend = hwm.initialTrendSmoothingFactors(tsSrc) assert trend == 7.5, "Initial Trend should be 7.5 but is %f" % trend
def season_factor_initialization_test(self): """ Test if seasonal correction factors are initialized correctly.""" hwm = HoltWintersMethod(seasonLength=4) data = [[0, 362.0], [1,385.0], [2, 432.0], [3, 341.0], [4, 382.0], [5, 409.0], [6, 498.0], [7, 387.0], [8, 473.0], [9, 513.0], [10, 582.0], [11, 474.0]] tsSrc = TimeSeries.from_twodim_list(data) seasonValues = hwm.initSeasonFactors(tsSrc) #correctness is not proven, but will be enough for regression testing assert seasonValues == [0.9302895649920525, 0.9980629019785198, 1.1551483413078523, 0.9164991917215755], "Season Values are not initialized correctly" # pragma: no cover
def season_length_test(self): """Test that the season length has to be greater than 0.""" for seasonLength in range(-4, 1): try: HoltWintersMethod(seasonLength=seasonLength) except ValueError: pass else: assert False # pragma: no cover for seasonLength in range(1, 12414, 412): HoltWintersMethod(seasonLength=seasonLength)
def initialization_test(self): """Test the initialization of the HoltWintersMethod method.""" HoltWintersMethod(0.2, 0.3, 0.4, 5) for alpha in [-0.1, 0.81, 1.1]: for beta in [-1.4, 0.12, 3.2]: for gamma in [-0.05, 1.3]: try: HoltWintersMethod(alpha, beta, gamma) except ValueError: pass else: assert False # pragma: no cover
def season_factor_initialization_test(self): """ Test if seasonal correction factors are initialized correctly.""" hwm = HoltWintersMethod(seasonLength=4) data = [[0, 362.0], [1, 385.0], [2, 432.0], [3, 341.0], [4, 382.0], [5, 409.0], [6, 498.0], [7, 387.0], [8, 473.0], [9, 513.0], [10, 582.0], [11, 474.0]] tsSrc = TimeSeries.from_twodim_list(data) seasonValues = hwm.initSeasonFactors(tsSrc) #correctness is not proven, but will be enough for regression testing assert seasonValues == [ 0.9302895649920525, 0.9980629019785198, 1.1551483413078523, 0.9164991917215755 ], "Season Values are not initialized correctly" # pragma: no cover
def optimize(request): """ Performs Holt Winters Parameter Optimization on the given post data. Expects the following values set in the post of the request: seasonLength - integer valuesToForecast - integer data - two dimensional array of [timestamp, value] """ #Parse arguments seasonLength = int(request.POST.get('seasonLength', 6)) valuesToForecast = int(request.POST.get('valuesToForecast', 0)) data = json.loads(request.POST.get('data', [])) original = TimeSeries.from_twodim_list(data) original.normalize("day") #due to bug in TimeSeries.apply original.set_timeformat("%d.%m") #optimize smoothing hwm = HoltWintersMethod(seasonLength = seasonLength, valuesToForecast = valuesToForecast) gridSearch = GridSearch(SMAPE) optimal_forecasting, error, optimal_params = gridSearch.optimize(original, [hwm]) #perform smoothing smoothed = optimal_forecasting.execute(original) smoothed.set_timeformat("%d.%m") result = { 'params': optimal_params, 'original': original, 'smoothed': smoothed, 'error': round(error.get_error(), 3) } return itty.Response(json.dumps(result, cls=PycastEncoder), content_type='application/json')
def forecasting_test(self): data = [ 362.0, 385.0, 432.0, 341.0, 382.0, 409.0, 498.0, 387.0, 473.0, 513.0, 582.0, 474.0, 544.0, 582.0, 681.0, 557.0, 628.0, 707.0, 773.0, 592.0, 627.0, 725.0, 854.0, 661.0 ] tsSrc = TimeSeries.from_twodim_list(zip(range(len(data)), data)) expected = [[0.0, 362.0], [1.0, 379.93673257607463], [2.0, 376.86173719924875], [3.0, 376.0203652542205], [4.0, 408.21988583215574], [5.0, 407.16235446485433], [6.0, 430.0950666716297], [7.0, 429.89797609228435], [8.0, 489.4888959723074], [9.0, 507.8407281475308], [10.0, 506.3556647249702], [11.0, 523.9422448655133], [12.0, 556.0311543025242], [13.0, 573.6520991970604], [14.0, 590.2149136780341], [15.0, 611.8813425659495], [16.0, 637.0393967524727], [17.0, 684.6600411792656], [18.0, 675.9589298142507], [19.0, 659.0266828674846], [20.0, 644.0903317144154], [21.0, 690.4507762388047], [22.0, 735.3219292023371], [23.0, 737.9752345691215], [24.0, 669.767091965978], [25.0, 737.5272444120604], [26.0, 805.3947787747426], [27.0, 902.1522777060334]] hwm = HoltWintersMethod(.7556, 0.0000001, .9837, 4, valuesToForecast=4) res = tsSrc.apply(hwm) #print res assert len(res) == len(tsSrc) + 4 assert res == TimeSeries.from_twodim_list(expected)
def sanity_test(self): """HoltWinters should throw an Exception if applied to a Time Series shorter than the season length""" hwm = HoltWintersMethod(seasonLength=2) data = [[0.0, 152]] tsSrc = TimeSeries.from_twodim_list(data) try: tsSrc.apply(hwm) except ValueError: pass else: assert False, "HoltWinters should throw an Exception if applied to a Time Series shorter than the season length" # pragma: no cover
def smoothing_test(self): """ Test if the smoothing works correctly""" data = [362.0, 385.0, 432.0, 341.0, 382.0, 409.0, 498.0, 387.0, 473.0, 513.0, 582.0, 474.0, 544.0, 582.0, 681.0, 557.0, 628.0, 707.0, 773.0, 592.0, 627.0, 725.0, 854.0, 661.0] tsSrc = TimeSeries.from_twodim_list(zip(range(len(data)),data)) expected = [[0.0, 362.0],[1.0, 379.93673257607463],[2.0, 376.86173719924875],[3.0, 376.0203652542205],[4.0, 408.21988583215574],[5.0, 407.16235446485433],[6.0, 430.0950666716297],[7.0, 429.89797609228435],[8.0, 489.4888959723074],[9.0, 507.8407281475308],[10.0, 506.3556647249702],[11.0, 523.9422448655133],[12.0, 556.0311543025242],[13.0, 573.6520991970604],[14.0, 590.2149136780341],[15.0, 611.8813425659495],[16.0, 637.0393967524727],[17.0, 684.6600411792656],[18.0, 675.9589298142507],[19.0, 659.0266828674846],[20.0, 644.0903317144154],[21.0, 690.4507762388047],[22.0, 735.3219292023371],[23.0, 737.9752345691215]] hwm = HoltWintersMethod(.7556, 0.0000001, .9837, 4, valuesToForecast=0) initialA_2 = hwm.computeA(2, tsSrc) assert initialA_2 == 510.5, "Third initial A_2 should be 510.5, but it %d" % initialA_2 initialTrend = hwm.initialTrendSmoothingFactors(tsSrc) assert initialTrend == 9.75, "Initial Trend should be 9.75 but is %d" % initialTrend #correctness is not proven, but will be enough for regression testing resTS = tsSrc.apply(hwm) expectedTS = TimeSeries.from_twodim_list(expected) assert len(resTS) == len(expectedTS) assert resTS == expectedTS, "Smoothing result not correct."
def holtWinters(request): """ Performs Holt Winters Smoothing on the given post data. Expects the following values set in the post of the request: smoothingFactor - float trendSmoothingFactor - float seasonSmoothingFactor - float seasonLength - integer valuesToForecast - integer data - two dimensional array of [timestamp, value] """ #Parse arguments smoothingFactor = float(request.POST.get('smoothingFactor', 0.2)) trendSmoothingFactor = float(request.POST.get('trendSmoothingFactor', 0.3)) seasonSmoothingFactor = float(request.POST.get('seasonSmoothingFactor', 0.4)) seasonLength = int(request.POST.get('seasonLength', 6)) valuesToForecast = int(request.POST.get('valuesToForecast', 0)) data = json.loads(request.POST.get('data', [])) #perform smoothing hwm = HoltWintersMethod(smoothingFactor = smoothingFactor, trendSmoothingFactor = trendSmoothingFactor, seasonSmoothingFactor = seasonSmoothingFactor, seasonLength = seasonLength, valuesToForecast = valuesToForecast) original = TimeSeries.from_twodim_list(data) original.set_timeformat("%d.%m") smoothed = hwm.execute(original) smoothed.set_timeformat("%d.%m") error = SMAPE() error.initialize(original, smoothed) #process the result result = { 'original': original, 'smoothed': smoothed, 'error': round(error.get_error(), 3) } return Response(json.dumps(result, cls=PycastEncoder), content_type='application/json')
def holtWinters(request): """ Performs Holt Winters Smoothing on the given post data. Expects the following values set in the post of the request: smoothingFactor - float trendSmoothingFactor - float seasonSmoothingFactor - float seasonLength - integer valuesToForecast - integer data - two dimensional array of [timestamp, value] """ #Parse arguments smoothingFactor = float(request.POST.get('smoothingFactor', 0.2)) trendSmoothingFactor = float(request.POST.get('trendSmoothingFactor', 0.3)) seasonSmoothingFactor = float(request.POST.get('seasonSmoothingFactor', 0.4)) seasonLength = int(request.POST.get('seasonLength', 6)) valuesToForecast = int(request.POST.get('valuesToForecast', 0)) data = json.loads(request.POST.get('data', [])) #perform smoothing hwm = HoltWintersMethod(smoothingFactor = smoothingFactor, trendSmoothingFactor = trendSmoothingFactor, seasonSmoothingFactor = seasonSmoothingFactor, seasonLength = seasonLength, valuesToForecast = valuesToForecast) original = TimeSeries.from_twodim_list(data) original.set_timeformat("%d.%m") smoothed = hwm.execute(original) smoothed.set_timeformat("%d.%m") error = SMAPE() error.initialize(original, smoothed) #process the result result = { 'original': original, 'smoothed': smoothed, 'error': round(error.get_error(), 3) } return itty.Response(json.dumps(result, cls=PycastEncoder), content_type='application/json')
def preset_season_factor_test(self): """Initial Season Factors should be presetable""" hwm = HoltWintersMethod(seasonLength=4) factors = [0, 1, 2, 3] hwm.set_parameter("seasonValues", factors) data = [[0, 362.0], [1, 385.0], [2, 432.0], [3, 341.0], [4, 382.0], [5, 409.0], [6, 498.0], [7, 387.0], [8, 473.0], [9, 513.0], [10, 582.0], [11, 474.0]] tsSrc = TimeSeries.from_twodim_list(data) seasonValues = hwm.initSeasonFactors(tsSrc) assert seasonValues == factors, "Preset Season Factors are not returned by initSeasonFactors" hwm.set_parameter("seasonValues", factors[:2]) try: hwm.initSeasonFactors(tsSrc) except AssertionError: pass else: assert False, "If preset season factors and season length do not comply, initSeasonFactors should throw an AssertionError" # pragma: no cover
def preset_season_factor_test(self): """Initial Season Factors should be presetable""" hwm = HoltWintersMethod(seasonLength=4) factors = [0,1,2,3] hwm.set_parameter("seasonValues", factors) data = [[0, 362.0], [1,385.0], [2, 432.0], [3, 341.0], [4, 382.0], [5, 409.0], [6, 498.0], [7, 387.0], [8, 473.0], [9, 513.0], [10, 582.0], [11, 474.0]] tsSrc = TimeSeries.from_twodim_list(data) seasonValues = hwm.initSeasonFactors(tsSrc) assert seasonValues == factors, "Preset Season Factors are not returned by initSeasonFactors" hwm.set_parameter("seasonValues", factors[:2]) try: hwm.initSeasonFactors(tsSrc) except AssertionError: pass else: assert False, "If preset season factors and season length do not comply, initSeasonFactors should throw an AssertionError" # pragma: no cover
def detect(self): """Detect anomalies and send an email.""" anomalies = [] for file_url in self.config['input']['data']: url_parts = file_url.rsplit('/', 1) file_name = url_parts[1].split('.')[0] if len(url_parts) == 2 else url_parts[0] try: response = urllib2.urlopen(file_url) reader = csv.reader(response) headers = reader.next() cache = {} forecasts = {} errors = {} for header in headers: cache[header] = [] forecasts[header] = [] for row in reader: # store in cache only if the row is full or something other than null values if not all([not a for a in row[1:]]): for i, column in enumerate(row): cache[headers[i]].append(column) except Exception, e: print('Error: Could not fetch the file %s: %s' % (file_url, e)) continue observation_count = len(cache['Day']) for header, data in cache.items(): if header == 'Day': continue # TODO: remove this, but without it `division by 0` error occurs below in es.execute data = [1 if x == '0' else x for x in data] ts = TimeSeries.from_twodim_list(zip(cache['Day'], data), '%Y-%m-%d') # Calculate forecasts # The values of alpha, beta, and gamma below are not set in stone. # But they are producing good results now. # Feel free to change if the script results in wrong anomalies. # TODO: maybe allow alpha, beta, gamma, and period to be configurable? es = HoltWintersMethod(smoothingFactor=.9, trendSmoothingFactor=0.5, seasonSmoothingFactor=0.1, seasonLength=7) # 7 days seems reasonable, no? forecast = es.execute(ts) forecasts[header] = [int(x[1]) if type(x[1]) == float else x[1] for x in forecast[:observation_count]] # Calculate difference between forecast and actual data mape = MeanAbsolutePercentageError() errors[header] = [] for j, value in enumerate(ts): local_error = mape.local_error([value[1]], [forecast[j][1]]) errors[header].append(int(local_error) if type(local_error) == float else local_error) # order columns forecast_values = [] error_values = [] for header in headers: if header == 'Day': forecast_values.append(cache['Day']) error_values.append(cache['Day']) else: forecast_values.append(forecasts[header]) error_values.append(errors[header]) # save to csv self.save_data_as_csv('%s-forecast' % file_name, headers, list(izip_longest(*forecast_values))) self.save_data_as_csv('%s-error' % file_name, headers, list(izip_longest(*error_values))) # # Graph - install matplotlib ;) # forecast_line, = plt.plot(forecasts['Home'], label='Forecast') # actual_line, = plt.plot(cache['Home'], label='Actual') # plt.legend(handles=[forecast_line, actual_line]) # plt.savefig("graph.png") # Select errors that are bigger than the threshold anomaly = { 'day': cache['Day'][observation_count - 1], 'graph': file_name, 'headers': [] } for header, error in errors.items(): if len(error) >= observation_count and error[observation_count - 1] >= self.config['error_threshold']: anomaly['headers'].append(header) anomalies.append(anomaly)