Example #1
0
    def initial_trend_values_test(self):
        hwm = HoltWintersMethod(seasonLength=4)
        data = [[0, 362.0], [1,385.0], [2, 432.0], [3, 341.0], [4, 382.0], [5, 425.0]]
        tsSrc = TimeSeries.from_twodim_list(data)
        trend = hwm.initialTrendSmoothingFactors(tsSrc)

        assert trend == 7.5, "Initial Trend should be 7.5 but is %f" % trend
Example #2
0
    def smoothing_test(self):
        """ Test if the smoothing works correctly"""

        data = [
            362.0, 385.0, 432.0, 341.0, 382.0, 409.0, 498.0, 387.0, 473.0,
            513.0, 582.0, 474.0, 544.0, 582.0, 681.0, 557.0, 628.0, 707.0,
            773.0, 592.0, 627.0, 725.0, 854.0, 661.0
        ]
        tsSrc = TimeSeries.from_twodim_list(zip(range(len(data)), data))
        expected = [[0.0, 362.0], [1.0, 379.93673257607463],
                    [2.0, 376.86173719924875], [3.0, 376.0203652542205],
                    [4.0, 408.21988583215574], [5.0, 407.16235446485433],
                    [6.0, 430.0950666716297], [7.0, 429.89797609228435],
                    [8.0, 489.4888959723074], [9.0, 507.8407281475308],
                    [10.0, 506.3556647249702], [11.0, 523.9422448655133],
                    [12.0, 556.0311543025242], [13.0, 573.6520991970604],
                    [14.0, 590.2149136780341], [15.0, 611.8813425659495],
                    [16.0, 637.0393967524727], [17.0, 684.6600411792656],
                    [18.0, 675.9589298142507], [19.0, 659.0266828674846],
                    [20.0, 644.0903317144154], [21.0, 690.4507762388047],
                    [22.0, 735.3219292023371], [23.0, 737.9752345691215]]
        hwm = HoltWintersMethod(.7556, 0.0000001, .9837, 4, valuesToForecast=0)

        initialA_2 = hwm.computeA(2, tsSrc)
        assert initialA_2 == 510.5, "Third initial A_2 should be 510.5, but it %d" % initialA_2

        initialTrend = hwm.initialTrendSmoothingFactors(tsSrc)
        assert initialTrend == 9.75, "Initial Trend should be 9.75 but is %d" % initialTrend

        #correctness is not proven, but will be enough for regression testing
        resTS = tsSrc.apply(hwm)
        expectedTS = TimeSeries.from_twodim_list(expected)

        assert len(resTS) == len(expectedTS)
        assert resTS == expectedTS, "Smoothing result not correct."
Example #3
0
    def initial_trend_values_test(self):
        hwm = HoltWintersMethod(seasonLength=4)
        data = [[0, 362.0], [1, 385.0], [2, 432.0], [3, 341.0], [4, 382.0],
                [5, 425.0]]
        tsSrc = TimeSeries.from_twodim_list(data)
        trend = hwm.initialTrendSmoothingFactors(tsSrc)

        assert trend == 7.5, "Initial Trend should be 7.5 but is %f" % trend
Example #4
0
    def season_factor_initialization_test(self):
        """ Test if seasonal correction factors are initialized correctly."""

        hwm = HoltWintersMethod(seasonLength=4)
        data = [[0, 362.0], [1,385.0], [2, 432.0], [3, 341.0], [4, 382.0], [5, 409.0], [6, 498.0], [7, 387.0], [8, 473.0], [9, 513.0], [10, 582.0], [11, 474.0]]
        tsSrc = TimeSeries.from_twodim_list(data)
        seasonValues = hwm.initSeasonFactors(tsSrc)

        #correctness is not proven, but will be enough for regression testing
        assert seasonValues == [0.9302895649920525, 0.9980629019785198, 1.1551483413078523, 0.9164991917215755], "Season Values are not initialized correctly"    # pragma: no cover
Example #5
0
    def season_length_test(self):
        """Test that the season length has to be greater than 0."""
        for seasonLength in range(-4, 1):
            try:
                HoltWintersMethod(seasonLength=seasonLength)
            except ValueError:
                pass
            else:
                assert False  # pragma: no cover

        for seasonLength in range(1, 12414, 412):
            HoltWintersMethod(seasonLength=seasonLength)
Example #6
0
    def initialization_test(self):
        """Test the initialization of the HoltWintersMethod method."""
        HoltWintersMethod(0.2, 0.3, 0.4, 5)

        for alpha in [-0.1, 0.81, 1.1]:
            for beta in [-1.4, 0.12, 3.2]:
                for gamma in [-0.05, 1.3]:
                    try:
                        HoltWintersMethod(alpha, beta, gamma)
                    except ValueError:
                        pass
                    else:
                        assert False  # pragma: no cover
Example #7
0
    def season_factor_initialization_test(self):
        """ Test if seasonal correction factors are initialized correctly."""

        hwm = HoltWintersMethod(seasonLength=4)
        data = [[0, 362.0], [1, 385.0], [2, 432.0], [3, 341.0], [4, 382.0],
                [5, 409.0], [6, 498.0], [7, 387.0], [8, 473.0], [9, 513.0],
                [10, 582.0], [11, 474.0]]
        tsSrc = TimeSeries.from_twodim_list(data)
        seasonValues = hwm.initSeasonFactors(tsSrc)

        #correctness is not proven, but will be enough for regression testing
        assert seasonValues == [
            0.9302895649920525, 0.9980629019785198, 1.1551483413078523,
            0.9164991917215755
        ], "Season Values are not initialized correctly"  # pragma: no cover
Example #8
0
def optimize(request):
    """
    Performs Holt Winters Parameter Optimization on the given post data.
    Expects the following values set in the post of the request:
        seasonLength - integer
        valuesToForecast - integer
        data - two dimensional array of [timestamp, value]
    """
    #Parse arguments
    seasonLength = int(request.POST.get('seasonLength', 6))
    valuesToForecast = int(request.POST.get('valuesToForecast', 0))
    data = json.loads(request.POST.get('data', []))

    original = TimeSeries.from_twodim_list(data)
    original.normalize("day") #due to bug in TimeSeries.apply
    original.set_timeformat("%d.%m")

    #optimize smoothing
    hwm = HoltWintersMethod(seasonLength = seasonLength, valuesToForecast = valuesToForecast)
    gridSearch = GridSearch(SMAPE)
    optimal_forecasting, error, optimal_params = gridSearch.optimize(original, [hwm])

    #perform smoothing
    smoothed = optimal_forecasting.execute(original)
    smoothed.set_timeformat("%d.%m")
    result = {  'params': optimal_params,
                'original': original,
                'smoothed': smoothed,
                'error': round(error.get_error(), 3)
                }
    return itty.Response(json.dumps(result, cls=PycastEncoder), content_type='application/json')
Example #9
0
    def forecasting_test(self):
        data = [
            362.0, 385.0, 432.0, 341.0, 382.0, 409.0, 498.0, 387.0, 473.0,
            513.0, 582.0, 474.0, 544.0, 582.0, 681.0, 557.0, 628.0, 707.0,
            773.0, 592.0, 627.0, 725.0, 854.0, 661.0
        ]
        tsSrc = TimeSeries.from_twodim_list(zip(range(len(data)), data))
        expected = [[0.0, 362.0], [1.0, 379.93673257607463],
                    [2.0, 376.86173719924875], [3.0, 376.0203652542205],
                    [4.0, 408.21988583215574], [5.0, 407.16235446485433],
                    [6.0, 430.0950666716297], [7.0, 429.89797609228435],
                    [8.0, 489.4888959723074], [9.0, 507.8407281475308],
                    [10.0, 506.3556647249702], [11.0, 523.9422448655133],
                    [12.0, 556.0311543025242], [13.0, 573.6520991970604],
                    [14.0, 590.2149136780341], [15.0, 611.8813425659495],
                    [16.0, 637.0393967524727], [17.0, 684.6600411792656],
                    [18.0, 675.9589298142507], [19.0, 659.0266828674846],
                    [20.0, 644.0903317144154], [21.0, 690.4507762388047],
                    [22.0, 735.3219292023371], [23.0, 737.9752345691215],
                    [24.0, 669.767091965978], [25.0, 737.5272444120604],
                    [26.0, 805.3947787747426], [27.0, 902.1522777060334]]

        hwm = HoltWintersMethod(.7556, 0.0000001, .9837, 4, valuesToForecast=4)
        res = tsSrc.apply(hwm)

        #print res
        assert len(res) == len(tsSrc) + 4
        assert res == TimeSeries.from_twodim_list(expected)
Example #10
0
 def sanity_test(self):
     """HoltWinters should throw an Exception if applied to a Time Series shorter than the season length"""
     hwm = HoltWintersMethod(seasonLength=2)
     data = [[0.0, 152]]
     tsSrc = TimeSeries.from_twodim_list(data)
     try:
         tsSrc.apply(hwm)
     except ValueError:
         pass
     else:
         assert False, "HoltWinters should throw an Exception if applied to a Time Series shorter than the season length"  # pragma: no cover
Example #11
0
    def smoothing_test(self):
        """ Test if the smoothing works correctly"""

        data = [362.0, 385.0, 432.0, 341.0, 382.0, 409.0, 498.0, 387.0, 473.0, 513.0, 582.0, 474.0, 544.0, 582.0, 681.0, 557.0, 628.0, 707.0, 773.0, 592.0, 627.0, 725.0, 854.0, 661.0]
        tsSrc = TimeSeries.from_twodim_list(zip(range(len(data)),data))
        expected = [[0.0, 362.0],[1.0, 379.93673257607463],[2.0, 376.86173719924875],[3.0, 376.0203652542205],[4.0, 408.21988583215574],[5.0, 407.16235446485433],[6.0, 430.0950666716297],[7.0, 429.89797609228435],[8.0, 489.4888959723074],[9.0, 507.8407281475308],[10.0, 506.3556647249702],[11.0, 523.9422448655133],[12.0, 556.0311543025242],[13.0, 573.6520991970604],[14.0, 590.2149136780341],[15.0, 611.8813425659495],[16.0, 637.0393967524727],[17.0, 684.6600411792656],[18.0, 675.9589298142507],[19.0, 659.0266828674846],[20.0, 644.0903317144154],[21.0, 690.4507762388047],[22.0, 735.3219292023371],[23.0, 737.9752345691215]]
        hwm = HoltWintersMethod(.7556, 0.0000001, .9837, 4, valuesToForecast=0)

        initialA_2 = hwm.computeA(2, tsSrc)
        assert  initialA_2 == 510.5, "Third initial A_2 should be 510.5, but it %d" % initialA_2

        initialTrend = hwm.initialTrendSmoothingFactors(tsSrc)
        assert initialTrend == 9.75, "Initial Trend should be 9.75 but is %d" % initialTrend

        #correctness is not proven, but will be enough for regression testing
        resTS       = tsSrc.apply(hwm)
        expectedTS  = TimeSeries.from_twodim_list(expected)

        assert len(resTS) == len(expectedTS)
        assert resTS == expectedTS, "Smoothing result not correct."
Example #12
0
def holtWinters(request):
	"""
	Performs Holt Winters Smoothing on the given post data.
	Expects the following values set in the post of the request:
		smoothingFactor - float
		trendSmoothingFactor - float
		seasonSmoothingFactor - float
		seasonLength - integer
		valuesToForecast - integer
		data - two dimensional array of [timestamp, value]
	"""
	#Parse arguments
	smoothingFactor = float(request.POST.get('smoothingFactor', 0.2))
	trendSmoothingFactor = float(request.POST.get('trendSmoothingFactor', 0.3))
	seasonSmoothingFactor = float(request.POST.get('seasonSmoothingFactor', 0.4))
	seasonLength = int(request.POST.get('seasonLength', 6))
	valuesToForecast = int(request.POST.get('valuesToForecast', 0))
	data = json.loads(request.POST.get('data', []))

	#perform smoothing
	hwm = HoltWintersMethod(smoothingFactor = smoothingFactor,
    						trendSmoothingFactor = trendSmoothingFactor,
    						seasonSmoothingFactor =  seasonSmoothingFactor,
    						seasonLength = seasonLength,
    						valuesToForecast = valuesToForecast)
	original = TimeSeries.from_twodim_list(data)
	original.set_timeformat("%d.%m")
	smoothed = hwm.execute(original)
	smoothed.set_timeformat("%d.%m")

	error = SMAPE()
	error.initialize(original, smoothed)
	
	#process the result	
	result = {	'original': original,
				'smoothed': smoothed,
				'error': round(error.get_error(), 3)
			}
	return Response(json.dumps(result, cls=PycastEncoder), content_type='application/json')
Example #13
0
def holtWinters(request):
    """
    Performs Holt Winters Smoothing on the given post data.
    Expects the following values set in the post of the request:
        smoothingFactor - float
        trendSmoothingFactor - float
        seasonSmoothingFactor - float
        seasonLength - integer
        valuesToForecast - integer
        data - two dimensional array of [timestamp, value]
    """
    #Parse arguments
    smoothingFactor = float(request.POST.get('smoothingFactor', 0.2))
    trendSmoothingFactor = float(request.POST.get('trendSmoothingFactor', 0.3))
    seasonSmoothingFactor = float(request.POST.get('seasonSmoothingFactor', 0.4))
    seasonLength = int(request.POST.get('seasonLength', 6))
    valuesToForecast = int(request.POST.get('valuesToForecast', 0))
    data = json.loads(request.POST.get('data', []))

    #perform smoothing
    hwm = HoltWintersMethod(smoothingFactor = smoothingFactor,
                            trendSmoothingFactor = trendSmoothingFactor,
                            seasonSmoothingFactor =  seasonSmoothingFactor,
                            seasonLength = seasonLength,
                            valuesToForecast = valuesToForecast)
    original = TimeSeries.from_twodim_list(data)
    original.set_timeformat("%d.%m")
    smoothed = hwm.execute(original)
    smoothed.set_timeformat("%d.%m")

    error = SMAPE()
    error.initialize(original, smoothed)

    #process the result
    result = {  'original': original,
                'smoothed': smoothed,
                'error': round(error.get_error(), 3)
            }
    return itty.Response(json.dumps(result, cls=PycastEncoder), content_type='application/json')
Example #14
0
    def preset_season_factor_test(self):
        """Initial Season Factors should be presetable"""
        hwm = HoltWintersMethod(seasonLength=4)
        factors = [0, 1, 2, 3]
        hwm.set_parameter("seasonValues", factors)

        data = [[0, 362.0], [1, 385.0], [2, 432.0], [3, 341.0], [4, 382.0],
                [5, 409.0], [6, 498.0], [7, 387.0], [8, 473.0], [9, 513.0],
                [10, 582.0], [11, 474.0]]
        tsSrc = TimeSeries.from_twodim_list(data)
        seasonValues = hwm.initSeasonFactors(tsSrc)

        assert seasonValues == factors, "Preset Season Factors are not returned by initSeasonFactors"

        hwm.set_parameter("seasonValues", factors[:2])
        try:
            hwm.initSeasonFactors(tsSrc)
        except AssertionError:
            pass
        else:
            assert False, "If preset season factors and season length do not comply, initSeasonFactors should throw an AssertionError"  # pragma: no cover
Example #15
0
    def preset_season_factor_test(self):
        """Initial Season Factors should be presetable"""
        hwm = HoltWintersMethod(seasonLength=4)
        factors = [0,1,2,3]
        hwm.set_parameter("seasonValues", factors)

        data = [[0, 362.0], [1,385.0], [2, 432.0], [3, 341.0], [4, 382.0], [5, 409.0], [6, 498.0], [7, 387.0], [8, 473.0], [9, 513.0], [10, 582.0], [11, 474.0]]
        tsSrc = TimeSeries.from_twodim_list(data)
        seasonValues = hwm.initSeasonFactors(tsSrc)

        assert seasonValues == factors, "Preset Season Factors are not returned by initSeasonFactors"

        hwm.set_parameter("seasonValues", factors[:2])
        try:
            hwm.initSeasonFactors(tsSrc)
        except AssertionError:
            pass
        else:
            assert False, "If preset season factors and season length do not comply, initSeasonFactors should throw an AssertionError"    # pragma: no cover
Example #16
0
    def detect(self):
        """Detect anomalies and send an email."""
        anomalies = []
        for file_url in self.config['input']['data']:
            url_parts = file_url.rsplit('/', 1)
            file_name = url_parts[1].split('.')[0] if len(url_parts) == 2 else url_parts[0]
            try:
                response = urllib2.urlopen(file_url)
                reader = csv.reader(response)
                headers = reader.next()
                cache = {}
                forecasts = {}
                errors = {}
                for header in headers:
                    cache[header] = []
                    forecasts[header] = []
                for row in reader:
                    # store in cache only if the row is full or something other than null values
                    if not all([not a for a in row[1:]]):
                        for i, column in enumerate(row):
                            cache[headers[i]].append(column)
            except Exception, e:
                print('Error: Could not fetch the file %s: %s' % (file_url, e))
                continue

            observation_count = len(cache['Day'])
            for header, data in cache.items():
                if header == 'Day':
                    continue

                # TODO: remove this, but without it `division by 0` error occurs below in es.execute
                data = [1 if x == '0' else x for x in data]
                ts = TimeSeries.from_twodim_list(zip(cache['Day'], data), '%Y-%m-%d')

                # Calculate forecasts
                # The values of alpha, beta, and gamma below are not set in stone.
                # But they are producing good results now.
                # Feel free to change if the script results in wrong anomalies.
                # TODO: maybe allow alpha, beta, gamma, and period to be configurable?
                es = HoltWintersMethod(smoothingFactor=.9, trendSmoothingFactor=0.5,
                                       seasonSmoothingFactor=0.1, seasonLength=7)  # 7 days seems reasonable, no?
                forecast = es.execute(ts)
                forecasts[header] = [int(x[1]) if type(x[1]) == float else x[1] for x in forecast[:observation_count]]

                # Calculate difference between forecast and actual data
                mape = MeanAbsolutePercentageError()
                errors[header] = []
                for j, value in enumerate(ts):
                    local_error = mape.local_error([value[1]], [forecast[j][1]])
                    errors[header].append(int(local_error) if type(local_error) == float else local_error)

            # order columns
            forecast_values = []
            error_values = []
            for header in headers:
                if header == 'Day':
                    forecast_values.append(cache['Day'])
                    error_values.append(cache['Day'])
                else:
                    forecast_values.append(forecasts[header])
                    error_values.append(errors[header])

            # save to csv
            self.save_data_as_csv('%s-forecast' % file_name, headers, list(izip_longest(*forecast_values)))
            self.save_data_as_csv('%s-error' % file_name, headers, list(izip_longest(*error_values)))

            # # Graph - install matplotlib ;)
            # forecast_line, = plt.plot(forecasts['Home'], label='Forecast')
            # actual_line, = plt.plot(cache['Home'], label='Actual')
            # plt.legend(handles=[forecast_line, actual_line])
            # plt.savefig("graph.png")

            # Select errors that are bigger than the threshold
            anomaly = {
                'day': cache['Day'][observation_count - 1],
                'graph': file_name,
                'headers': []
            }
            for header, error in errors.items():
                if len(error) >= observation_count and error[observation_count - 1] >= self.config['error_threshold']:
                    anomaly['headers'].append(header)

            anomalies.append(anomaly)