def double_initialize_test(self): """Test for the error ocuring when the same error measure is initialized twice.""" data = [[0.0, 0.0], [1, 0.1], [2, 0.2], [3, 0.3], [4, 0.4]] tsOrg = TimeSeries.from_twodim_list(data) tsCalc = TimeSeries.from_twodim_list(data) bem = BaseErrorMeasure() bem_calculate = bem._calculate bem_local_error = bem.local_error def return_zero(ignoreMe, ignoreMeToo): return 0 # remove the NotImplementedErrors for initialization bem.local_error = return_zero bem._calculate = return_zero # correct initialize call bem.initialize(tsOrg, tsCalc) # incorrect initialize call for cnt in range(10): try: bem.initialize(tsOrg, tsCalc) except Exception: pass else: assert False # pragma: no cover bem.local_error = bem_calculate bem._calculate = bem_local_error
def setUp(self): self.ts1 = TimeSeries.from_twodim_list([[1.0, 1.0], [2.0, 20.0], [3.0, 3.0]]) self.ts2 = TimeSeries.from_twodim_list([[1.0, 10.0], [2.0, 2.0], [3.0, 30.0]]) self.msd = MeanSignedDifferenceError() self.msd.initialize(self.ts1, self.ts2)
def start_and_enddate_test(self): """Testing for startDate, endDate exceptions.""" data = [[0.0, 0.0], [1, 0.1], [2, 0.2], [3, 0.3], [4, 0.4]] tsOrg = TimeSeries.from_twodim_list(data) tsCalc = TimeSeries.from_twodim_list(data) bem = MeanSquaredError() bem.initialize(tsOrg, tsCalc) for startDate in [0.0, 1, 2, 3]: bem.get_error(startDate=startDate, endDate=4) for endDate in [1, 2, 3, 4]: bem.get_error(startDate=0.0, endDate=endDate) try: bem.get_error(startDate=23) except ValueError: pass else: assert False # pragma: no cover try: bem.get_error(endDate=-1) except ValueError: pass else: assert False # pragma: no cover
def execute(self, timeSeries): """Creates a new TimeSeries containing the SMA values for the predefined windowsize. :param TimeSeries timeSeries: The TimeSeries used to calculate the simple moving average values. :return: TimeSeries object containing the smooth moving average. :rtype: TimeSeries :raise: Raises a :py:exc:`ValueError` wif the defined windowsize is larger than the number of elements in timeSeries :note: This implementation aims to support independent for loop execution. """ windowsize = self._parameters["windowsize"] if len (timeSeries) < windowsize: raise ValueError("windowsize is larger than the number of elements in timeSeries.") tsLength = len(timeSeries) nbrOfLoopRuns = tsLength - windowsize + 1 res = TimeSeries() for idx in xrange(nbrOfLoopRuns): end = idx + windowsize data = timeSeries[idx:end] timestamp = data[windowsize//2][0] value = sum([i[1] for i in data])/windowsize res.add_entry(timestamp, value) res.sort_timeseries() return res
def test_confidence_interval(self): """ Test if given two timeseries and a desired confidence interval, regression gives us the correct over and underestimation. """ data_x = zip(range(100), range(100)) overestimations = [[90, 90 - 1], [91, 91 - 3], [92, 92 - 1], [93, 93 - 40], [94, 94 - 1]] underestimations = [[95, 95 + 5], [96, 96 + 1 ], [97,97 + 4], [98, 98 + 3], [99, 99 + 1]] data_y = data_x[:90] + overestimations + underestimations ts_x = TimeSeries.from_twodim_list(data_x) ts_y = TimeSeries.from_twodim_list(data_y) #Mock the random.sample method so that we can use our outliers as samples with patch('pycast.common.timeseries.random.sample') as sample_mock: sample_mock.return_value = underestimations+overestimations reg = Regression() n, m, error = reg.calculate_parameters_with_confidence(ts_x, ts_y, .6) #Since all values are the same the params should be n=0, m=1 self.assertEquals(0,n) self.assertEquals(1,m) #assert under/overestimation self.assertEquals(error[0], -1) self.assertEquals(error[1], 3)
def double_initialize_test(self): """Test for the error ocuring when the same error measure is initialized twice.""" data = [[0.0, 0.0], [1, 0.1], [2, 0.2], [3, 0.3], [4, 0.4]] tsOrg = TimeSeries.from_twodim_list(data) tsCalc = TimeSeries.from_twodim_list(data) bem = BaseErrorMeasure() bem_calculate = bem._calculate bem_local_error = bem.local_error def return_zero(ignoreMe, ignoreMeToo): return 0 ## remove the NotImplementedErrors for initialization bem.local_error = return_zero bem._calculate = return_zero ## correct initialize call bem.initialize(tsOrg, tsCalc) ## incorrect initialize call for cnt in xrange(10): try: bem.initialize(tsOrg, tsCalc) except StandardError: pass else: assert False # pragma: no cover bem.local_error = bem_calculate bem._calculate = bem_local_error
def initialization_test(self): """Test for MASE initialization.""" dataOrg = [[1.0, 10], [2.0, 12], [3.0, 14], [4.0, 13], [5.0, 17], [6.0, 20], [7.0, 23], [8.0, 26], [9.0, 29], [10.0, 31], [11.0, 26], [12.0, 21], [13.0, 18], [14.0, 14], [15.0, 13], [16.0, 19], [17.0, 24], [18.0, 28], [19.0, 30], [20.0, 32]] dataFor = [[1.0, 11], [2.0, 13], [3.0, 14], [4.0, 11], [5.0, 13], [6.0, 18], [7.0, 20], [8.0, 26], [9.0, 21], [10.0, 34], [11.0, 23], [12.0, 23], [13.0, 15], [14.0, 12], [15.0, 14], [16.0, 17], [17.0, 25], [18.0, 22], [19.0, 14], [20.0, 30]] tsOrg = TimeSeries.from_twodim_list(dataOrg) tsFor = TimeSeries.from_twodim_list(dataFor) em = MeanAbsoluteScaledError(historyLength=5) em.initialize(tsOrg, tsFor) assert len(em._errorValues) == len(em._historicMeans), "For each error value an historic mean has to exsist." try: em.initialize(tsOrg, tsFor) except StandardError: pass else: assert False # pragma: no cover em = MeanAbsoluteScaledError(historyLength=20.0) em.initialize(tsOrg, tsFor) assert len(em._errorValues) == len(em._historicMeans), "For each error value an historic mean has to exsist." assert em._historyLength == 4, "The history is %s entries long. 4 were expected." % em._historyLength em = MeanAbsoluteScaledError(historyLength=40.0) em.initialize(tsOrg, tsFor) assert len(em._errorValues) == len(em._historicMeans), "For each error value an historic mean has to exsist." assert em._historyLength == 8, "The history is %s entries long. 8 were expected." % em._historyLength
def smoothing_test(self): """Test smoothing part of ExponentialSmoothing.""" data = [[0, 10.0], [1, 18.0], [2, 29.0], [3, 15.0], [4, 30.0], [5, 30.0], [6, 12.0], [7, 16.0]] tsSrc = TimeSeries.from_twodim_list(data) tsSrc.normalize("second") ## Initialize a correct result. ### The numbers look a little bit odd, based on the binary translation problem data = [[1.5, 10.0], [2.5, 12.4], [3.5, 17.380000000000003], [4.5, 16.666], [5.5, 20.6662], [6.5, 23.46634], [7.5, 20.026438]] tsDst = TimeSeries.from_twodim_list(data) ## Initialize the method es = ExponentialSmoothing(0.3, 0) res = tsSrc.apply(es) if not res == tsDst: raise AssertionError data.append([8.5, 18.8185066]) tsDst = TimeSeries.from_twodim_list(data) ## Initialize the method es = ExponentialSmoothing(0.3) res = tsSrc.apply(es) if not res == tsDst: raise AssertionError
def smoothing_test(self): """ Test if the smoothing works correctly""" data = [ 362.0, 385.0, 432.0, 341.0, 382.0, 409.0, 498.0, 387.0, 473.0, 513.0, 582.0, 474.0, 544.0, 582.0, 681.0, 557.0, 628.0, 707.0, 773.0, 592.0, 627.0, 725.0, 854.0, 661.0 ] tsSrc = TimeSeries.from_twodim_list(zip(range(len(data)), data)) expected = [[0.0, 362.0], [1.0, 379.93673257607463], [2.0, 376.86173719924875], [3.0, 376.0203652542205], [4.0, 408.21988583215574], [5.0, 407.16235446485433], [6.0, 430.0950666716297], [7.0, 429.89797609228435], [8.0, 489.4888959723074], [9.0, 507.8407281475308], [10.0, 506.3556647249702], [11.0, 523.9422448655133], [12.0, 556.0311543025242], [13.0, 573.6520991970604], [14.0, 590.2149136780341], [15.0, 611.8813425659495], [16.0, 637.0393967524727], [17.0, 684.6600411792656], [18.0, 675.9589298142507], [19.0, 659.0266828674846], [20.0, 644.0903317144154], [21.0, 690.4507762388047], [22.0, 735.3219292023371], [23.0, 737.9752345691215]] hwm = HoltWintersMethod(.7556, 0.0000001, .9837, 4, valuesToForecast=0) initialA_2 = hwm.computeA(2, tsSrc) assert initialA_2 == 510.5, "Third initial A_2 should be 510.5, but it %d" % initialA_2 initialTrend = hwm.initialTrendSmoothingFactors(tsSrc) assert initialTrend == 9.75, "Initial Trend should be 9.75 but is %d" % initialTrend #correctness is not proven, but will be enough for regression testing resTS = tsSrc.apply(hwm) expectedTS = TimeSeries.from_twodim_list(expected) assert len(resTS) == len(expectedTS) assert resTS == expectedTS, "Smoothing result not correct."
def smoothing_test(self): """Test smoothing part of ExponentialSmoothing.""" data = [[0, 10.0], [1, 18.0], [2, 29.0], [3, 15.0], [4, 30.0], [5, 30.0], [6, 12.0], [7, 16.0]] tsSrc = TimeSeries.from_twodim_list(data) tsSrc.normalize("second") ## Initialize a correct result. ### The numbers look a little bit odd, based on the binary translation problem data = [[1.5, 10.0],[2.5, 12.4],[3.5, 17.380000000000003],[4.5, 16.666],[5.5, 20.6662],[6.5, 23.46634],[7.5, 20.026438]] tsDst = TimeSeries.from_twodim_list(data) ## Initialize the method es = ExponentialSmoothing(0.3, 0) res = tsSrc.apply(es) if not res == tsDst: raise AssertionError data.append([8.5, 18.8185066]) tsDst = TimeSeries.from_twodim_list(data) ## Initialize the method es = ExponentialSmoothing(0.3) res = tsSrc.apply(es) if not res == tsDst: raise AssertionError
def forecasting_test(self): data = [ 362.0, 385.0, 432.0, 341.0, 382.0, 409.0, 498.0, 387.0, 473.0, 513.0, 582.0, 474.0, 544.0, 582.0, 681.0, 557.0, 628.0, 707.0, 773.0, 592.0, 627.0, 725.0, 854.0, 661.0 ] tsSrc = TimeSeries.from_twodim_list(zip(range(len(data)), data)) expected = [[0.0, 362.0], [1.0, 379.93673257607463], [2.0, 376.86173719924875], [3.0, 376.0203652542205], [4.0, 408.21988583215574], [5.0, 407.16235446485433], [6.0, 430.0950666716297], [7.0, 429.89797609228435], [8.0, 489.4888959723074], [9.0, 507.8407281475308], [10.0, 506.3556647249702], [11.0, 523.9422448655133], [12.0, 556.0311543025242], [13.0, 573.6520991970604], [14.0, 590.2149136780341], [15.0, 611.8813425659495], [16.0, 637.0393967524727], [17.0, 684.6600411792656], [18.0, 675.9589298142507], [19.0, 659.0266828674846], [20.0, 644.0903317144154], [21.0, 690.4507762388047], [22.0, 735.3219292023371], [23.0, 737.9752345691215], [24.0, 669.767091965978], [25.0, 737.5272444120604], [26.0, 805.3947787747426], [27.0, 902.1522777060334]] hwm = HoltWintersMethod(.7556, 0.0000001, .9837, 4, valuesToForecast=4) res = tsSrc.apply(hwm) #print res assert len(res) == len(tsSrc) + 4 assert res == TimeSeries.from_twodim_list(expected)
def calculate_parameters_one_empty_list_test(self): """Test for ValueError if one Timeseries are empty""" tsOne = TimeSeries.from_twodim_list([[1, 12.34]]) tsTwo = TimeSeries.from_twodim_list([]) reg = Regression() self.assertRaises(ValueError, reg.calculate_parameters, tsOne, tsTwo)
def initialization_test(self): """Test for MASE initialization.""" dataOrg = [[1.0, 10], [2.0, 12], [3.0, 14], [4.0, 13], [5.0, 17], [6.0, 20], [7.0, 23], [8.0, 26], [9.0, 29], [10.0, 31], [11.0, 26], [12.0, 21], [13.0, 18], [14.0, 14], [15.0, 13], [16.0, 19], [17.0, 24], [18.0, 28], [19.0, 30], [20.0, 32]] dataFor = [[1.0, 11], [2.0, 13], [3.0, 14], [4.0, 11], [5.0, 13], [6.0, 18], [7.0, 20], [8.0, 26], [9.0, 21], [10.0, 34], [11.0, 23], [12.0, 23], [13.0, 15], [14.0, 12], [15.0, 14], [16.0, 17], [17.0, 25], [18.0, 22], [19.0, 14], [20.0, 30]] tsOrg = TimeSeries.from_twodim_list(dataOrg) tsFor = TimeSeries.from_twodim_list(dataFor) em = MeanAbsoluteScaledError(historyLength=5) em.initialize(tsOrg, tsFor) assert len(em._errorValues) == len(em._historicMeans), "For each error value an historic mean has to exsist." try: em.initialize(tsOrg, tsFor) except Exception: pass else: assert False # pragma: no cover em = MeanAbsoluteScaledError(historyLength=20.0) em.initialize(tsOrg, tsFor) assert len(em._errorValues) == len(em._historicMeans), "For each error value an historic mean has to exsist." assert em._historyLength == 4, "The history is %s entries long. 4 were expected." % em._historyLength em = MeanAbsoluteScaledError(historyLength=40.0) em.initialize(tsOrg, tsFor) assert len(em._errorValues) == len(em._historicMeans), "For each error value an historic mean has to exsist." assert em._historyLength == 8, "The history is %s entries long. 8 were expected." % em._historyLength
def test_confidence_interval(self): """ Test if given two timeseries and a desired confidence interval, regression gives us the correct over and underestimation. """ data_x = zip(range(100), range(100)) overestimations = [[90, 90 - 1], [91, 91 - 3], [92, 92 - 1], [93, 93 - 40], [94, 94 - 1]] underestimations = [[95, 95 + 5], [96, 96 + 1], [97, 97 + 4], [98, 98 + 3], [99, 99 + 1]] data_y = data_x[:90] + overestimations + underestimations ts_x = TimeSeries.from_twodim_list(data_x) ts_y = TimeSeries.from_twodim_list(data_y) #Mock the random.sample method so that we can use our outliers as samples with patch('pycast.common.timeseries.random.sample') as sample_mock: sample_mock.return_value = underestimations + overestimations reg = Regression() n, m, error = reg.calculate_parameters_with_confidence( ts_x, ts_y, .6) #Since all values are the same the params should be n=0, m=1 self.assertEquals(0, n) self.assertEquals(1, m) #assert under/overestimation self.assertEquals(error[0], -1) self.assertEquals(error[1], 3)
def train_target(self, data_list, model_list):# data_list: [date, value] orig = TimeSeries(isNormalized=True) for i in range(len(data_list)): orig.add_entry(data_list[i][0], data_list[i][1]) gridSearch = GridSearch(SMAPE) optimal_forecasting, error, optimal_params = gridSearch.optimize(orig, model_list) #print "======" + str(optimal_forecasting._parameters) return optimal_forecasting
def timeseries___setitem___test(self): """Test TimeSeries.__setitem__""" data = [[0.0, 0.0], [0.1, 0.1], [0.2, 0.2], [0.3, 0.3], [0.4, 0.4], [0.5, 0.5]] tsOne = TimeSeries.from_twodim_list(data) tsTwo = TimeSeries.from_twodim_list(data) tsTwo[1] = [0.2, 0.4] if tsOne == tsTwo: raise AssertionError
def list_serialization_formatfree_test(self): """Test the format free list serialization.""" data = [[0.0, 0.0], [0.1, 0.1], [0.2, 0.2], [0.3, 0.3], [0.4, 0.4], [0.5, 0.5]] tsOne = TimeSeries.from_twodim_list(data) data = tsOne.to_twodim_list() tsTwo = TimeSeries.from_twodim_list(data) assert tsOne == tsTwo
def energy_data(request): """ Connects to the database and loads Readings for device 8. """ cur = db.cursor().execute("""SELECT timestamp, current FROM Readings""") original = TimeSeries() original.initialize_from_sql_cursor(cur) original.normalize("day", fusionMethod = "sum") return Response(json.dumps(original, cls=PycastEncoder), content_type='application/json')
def list_serialization_format_test(self): """Test the list serialization including time foramtting instructions.""" data = [[0.0, 0.0], [1.0, 0.1], [2.0, 0.2], [3.0, 0.3], [4.0, 0.4], [5.0, 0.5]] tsOne = TimeSeries.from_twodim_list(data) tsOne.set_timeformat("%Y-%m-%d_%H:%M:%S") data = tsOne.to_twodim_list() tsTwo = TimeSeries.from_twodim_list(data, format="%Y-%m-%d_%H:%M:%S") assert tsOne == tsTwo
def calculate_parameter_duplicate_dates_test(self): """Test for ValueError if dates in timeseries are not distinct""" # Initialize input data1 = [[1, 12.23], [4, 23.34]] data2 = [[1, 34.23], [1, 16.23]] tsSrc1 = TimeSeries.from_twodim_list(data1) tsSrc2 = TimeSeries.from_twodim_list(data2) reg = Regression() self.assertRaises(ValueError, reg.calculate_parameters, tsSrc1, tsSrc2)
def timeseries_sort_test(self): """Tests the sort_timeseries function.""" data = [[0.0, 0.0], [0.1, 0.1], [0.2, 0.2], [0.3, 0.3], [0.4, 0.4], [0.5, 0.5]] ts = TimeSeries.from_twodim_list(data) ts.sort_timeseries() ts.sort_timeseries(False) ts = TimeSeries(isSorted=True) ts.sort_timeseries()
def list_serialization_format_test(self): """Test the list serialization including time foramtting instructions.""" data = [[0.0, 0.0], [1.0, 0.1], [2.0, 0.2], [3.0, 0.3], [4.0, 0.4], [5.0, 0.5]] tsOne = TimeSeries.from_twodim_list(data) tsOne.set_timeformat("%Y-%m-%d_%H:%M:%S") data = tsOne.to_twodim_list() tsTwo = TimeSeries.from_twodim_list(data, tsformat="%Y-%m-%d_%H:%M:%S") assert tsOne == tsTwo
def calculate_parameter_with_short_timeseries_test(self): """Test for ValueError if Timeseries has only one matching date""" # Initialize input data1 = [[1, 12.23], [4, 23.34]] data2 = [[1, 34.23]] tsSrc1 = TimeSeries.from_twodim_list(data1) tsSrc2 = TimeSeries.from_twodim_list(data2) reg = Regression() self.assertRaises(ValueError, reg.calculate_parameters, tsSrc1, tsSrc2)
def calculate_parameters_without_match_test(self): """Test for ValueError, if the input timeseries have no macthing dates""" # Initialize input data1 = [[1, 12.42], [6, 12.32], [8, 12.45]] data2 = [[2, 32.45], [4, 23.12], [7, 65.34]] tsOne = TimeSeries.from_twodim_list(data1) tsTwo = TimeSeries.from_twodim_list(data2) reg = Regression() self.assertRaises(ValueError, reg.calculate_parameters, tsOne, tsTwo)
def forecasting_test(self): data = [362.0, 385.0, 432.0, 341.0, 382.0, 409.0, 498.0, 387.0, 473.0, 513.0, 582.0, 474.0, 544.0, 582.0, 681.0, 557.0, 628.0, 707.0, 773.0, 592.0, 627.0, 725.0, 854.0, 661.0] tsSrc = TimeSeries.from_twodim_list(zip(range(len(data)),data)) expected = [[0.0, 362.0],[1.0, 379.93673257607463],[2.0, 376.86173719924875],[3.0, 376.0203652542205],[4.0, 408.21988583215574],[5.0, 407.16235446485433],[6.0, 430.0950666716297],[7.0, 429.89797609228435],[8.0, 489.4888959723074],[9.0, 507.8407281475308],[10.0, 506.3556647249702],[11.0, 523.9422448655133],[12.0, 556.0311543025242],[13.0, 573.6520991970604],[14.0, 590.2149136780341],[15.0, 611.8813425659495],[16.0, 637.0393967524727],[17.0, 684.6600411792656],[18.0, 675.9589298142507],[19.0, 659.0266828674846],[20.0, 644.0903317144154],[21.0, 690.4507762388047],[22.0, 735.3219292023371],[23.0, 737.9752345691215],[24.0, 669.767091965978],[25.0, 737.5272444120604],[26.0, 805.3947787747426],[27.0, 902.1522777060334]] hwm = HoltWintersMethod(.7556, 0.0000001, .9837, 4, valuesToForecast = 4) res = tsSrc.apply(hwm) #print res assert len(res) == len(tsSrc) + 4 assert res == TimeSeries.from_twodim_list(expected)
def normalize_test(self): """Test timeseries normalization.""" dataOne = [[0.0, 0.0], [1.0, 1.0], [2.0, 2.0], [5.1, 5.0]] dataTwo = [[0.5, 0.0], [1.5, 1.0], [2.5, 2.0], [3.5, 3.0], [4.5, 4.0], [5.5, 5.0]] tsOne = TimeSeries.from_twodim_list(dataOne) tsTwo = TimeSeries.from_twodim_list(dataTwo) tsOne.normalize("second") if not len(tsOne) == len(tsTwo): raise AssertionError if not tsOne == tsTwo: raise AssertionError
def predict_test(self): """Test if given an independent timeseries and parameters the right prediction is done""" data1 = [[1, 1], [2, 2], [3, 3]] data2 = [[1, 3], [2, 5], [3, 7]] ts1 = TimeSeries.from_twodim_list(data1) ts2 = TimeSeries.from_twodim_list(data2) reg = Regression() result = reg.predict(ts1, 1, 2) self.assertEquals(ts2, result)
def addition_test(self): """Test the addition operator for TimeSeries instances.""" dataOne = [[0.0, 0.0], [0.1, 0.1], [0.2, 0.2], [0.3, 0.3], [0.4, 0.4], [0.5, 0.5]] dataTwo = [[0.0, 0.0], [0.1, 0.1], [0.2, 0.2], [0.3, 0.3], [0.4, 0.4], [0.5, 0.5]] dataThree = dataOne + dataTwo dataThree.sort(key=lambda item:item[0]) tsOne = TimeSeries.from_twodim_list(dataOne) tsTwo = TimeSeries.from_twodim_list(dataTwo) tsThree = TimeSeries.from_twodim_list(dataThree) if not tsThree == tsOne + tsTwo: raise AssertionError
def predict_test(self): """Test if given an independent timeseries and parameters the right prediction is done""" data1 = [[1, 1],[2,2],[3,3]] data2 = [[1, 3],[2,5],[3,7]] ts1 = TimeSeries.from_twodim_list(data1) ts2 = TimeSeries.from_twodim_list(data2) reg = Regression() result = reg.predict(ts1, 1, 2) self.assertEquals(ts2, result)
def list_initialization_test(self): """Test TimeSeries initialization from a given list.""" data = [[0.0, 0.0], [0.1, 0.1], [0.2, 0.2], [0.3, 0.3], [0.4, 0.4], [0.5, 0.5]] tsOne = TimeSeries() for entry in data: tsOne.add_entry(*entry) tsTwo = TimeSeries.from_twodim_list(data) if not (len(tsOne) == len(tsTwo)): raise AssertionError if not (tsOne == tsTwo): raise AssertionError
def check_normalization_test(self): """Check for check_normalization.""" dataOK = zip(xrange(10), [random.random() for i in xrange(10)]) dataNotOK = dataOK[:] del dataNotOK[2] del dataNotOK[7] tsOK = TimeSeries.from_twodim_list(dataOK) tsNotOK = TimeSeries.from_twodim_list(dataNotOK) assert tsOK._check_normalization() assert not tsNotOK._check_normalization()
def error_calculation_test(self): """Testing for the correct MASE calculation. History length is 5 in this test. """ dataOrg = [[1.0, 10], [2.0, 12], [3.0, 14], [4.0, 13], [5.0, 17], [6.0, 20], [7.0, 23], [8.0, 26], [9.0, 29], [10.0, 31], [11.0, 26], [12.0, 21], [13.0, 18], [14.0, 14], [15.0, 13], [16.0, 19], [17.0, 24], [18.0, 28], [19.0, 30], [20.0, 32]] dataFor = [[1.0, 11], [2.0, 13], [3.0, 14], [4.0, 11], [5.0, 13], [6.0, 18], [7.0, 20], [8.0, 26], [9.0, 21], [10.0, 34], [11.0, 23], [12.0, 23], [13.0, 15], [14.0, 12], [15.0, 14], [16.0, 17], [17.0, 25], [18.0, 22], [19.0, 14], [20.0, 30]] # 2 2 1 4 3 3 3 3 2 5 5 3 4 1 6 5 4 2 2 # Sum(History) 12 13 14 16 14 16 18 18 19 18 19 19 20 18 19 # Mean(History) # # # # # 2.4 2.6 2.8 3.2 2.8 3.2 3.6 3.6 3.8 3.6 3.8 3.8 4.0 3.6 3.8 # AD 3 0 8 3 3 2 3 2 1 2 1 6 16 2 # Sum(AD) 3 3 11 14 17 19 22 24 25 27 28 34 50 52 # MAD 3 1.5 3.666 3.5 3.4 3.166 3.142 3 2.777 2.7 2.545 2.833 3.571 3.714 # MASE (0% - 100%) 1.25 0.625 1.527 1.458 1.416 1.319 1.309 1.25 1.157 1.125 1.06 1.18 1.602 1.547 tsOrg = TimeSeries.from_twodim_list(dataOrg) tsFor = TimeSeries.from_twodim_list(dataFor) historyLength = 5 em = MeanAbsoluteScaledError(historyLength=historyLength) em.initialize(tsOrg, tsFor) # check for error calculation depending on a specific endpoint correctResult = [1.25, 0.625, 1.527, 1.458, 1.416, 1.319, 1.309, 1.25, 1.157, 1.125, "1.060", "1.180", 1.602, 1.547] percentage = 100.0 / len(correctResult) + 0.2 for errVal in range(14): endPercentage = percentage * (errVal + 1) # set maximum percentage if endPercentage > 100.0: endPercentage = 100.0 calcErr = str(em.get_error(endPercentage=endPercentage))[:5] correctRes = str(correctResult[errVal])[:5] assert calcErr == correctRes for errVal in range(14): endDate = dataOrg[errVal + 6][0] calcErr = str(em.get_error(endDate=endDate))[:5] correctRes = str(correctResult[errVal])[:5] assert calcErr == correctRes, "%s != %s" % (calcErr, correctRes) em.get_error(startDate=7.0) try: em.get_error(startDate=42.23) except ValueError: pass else: assert False # pragma: no cover
def error_calculation_test(self): """Testing for the correct MASE calculation. History length is 5 in this test. """ dataOrg = [[1.0, 10], [2.0, 12], [3.0, 14], [4.0, 13], [5.0, 17], [6.0, 20], [7.0, 23], [8.0, 26], [9.0, 29], [10.0, 31], [11.0, 26], [12.0, 21], [13.0, 18], [14.0, 14], [15.0, 13], [16.0, 19], [17.0, 24], [18.0, 28], [19.0, 30], [20.0, 32]] dataFor = [[1.0, 11], [2.0, 13], [3.0, 14], [4.0, 11], [5.0, 13], [6.0, 18], [7.0, 20], [8.0, 26], [9.0, 21], [10.0, 34], [11.0, 23], [12.0, 23], [13.0, 15], [14.0, 12], [15.0, 14], [16.0, 17], [17.0, 25], [18.0, 22], [19.0, 14], [20.0, 30]] ## 2 2 1 4 3 3 3 3 2 5 5 3 4 1 6 5 4 2 2 ## Sum(History) 12 13 14 16 14 16 18 18 19 18 19 19 20 18 19 ## Mean(History) ## ## ## ## ## 2.4 2.6 2.8 3.2 2.8 3.2 3.6 3.6 3.8 3.6 3.8 3.8 4.0 3.6 3.8 ## AD 3 0 8 3 3 2 3 2 1 2 1 6 16 2 ## Sum(AD) 3 3 11 14 17 19 22 24 25 27 28 34 50 52 ## MAD 3 1.5 3.666 3.5 3.4 3.166 3.142 3 2.777 2.7 2.545 2.833 3.571 3.714 ## MASE (0% - 100%) 1.25 0.625 1.527 1.458 1.416 1.319 1.309 1.25 1.157 1.125 1.06 1.18 1.602 1.547 tsOrg = TimeSeries.from_twodim_list(dataOrg) tsFor = TimeSeries.from_twodim_list(dataFor) historyLength = 5 em = MeanAbsoluteScaledError(historyLength=historyLength) em.initialize(tsOrg, tsFor) ## check for error calculation depending on a specific endpoint correctResult = [1.25, 0.625, 1.527, 1.458, 1.416, 1.319, 1.309, 1.25, 1.157, 1.125, "1.060", "1.180", 1.602, 1.547] percentage = 100.0 / len(correctResult) + 0.2 for errVal in xrange(14): endPercentage = percentage * (errVal + 1) ## set maximum percentage if endPercentage > 100.0: endPercentage = 100.0 calcErr = str(em.get_error(endPercentage=endPercentage))[:5] correctRes = str(correctResult[errVal])[:5] assert calcErr == correctRes for errVal in xrange(14): endDate = dataOrg[errVal + 6][0] calcErr = str(em.get_error(endDate=endDate))[:5] correctRes = str(correctResult[errVal])[:5] assert calcErr == correctRes, "%s != %s" % (calcErr, correctRes) em.get_error(startDate=7.0) try: em.get_error(startDate=42.23) except ValueError: pass else: assert False # pragma: no cover
def error_calculation_test(self): """Test the MdAPE error calculation.""" dataOrg = [[1,1], [2,2], [3,3], [4,4], [5,5], [6,6], [7,8], [7.3, 5], [8, 0], [9,10]] dataCalc = [[1,3], [2,5], [3,0], [4,3], [5,5], [6.1,6], [7,3], [7.3, 5], [8, 0], [9,9]] tsOrg = TimeSeries.from_twodim_list(dataOrg) tsCalc = TimeSeries.from_twodim_list(dataCalc) em = MedianAbsolutePercentageError() em.initialize(tsOrg, tsCalc) assert em.get_error() == 62.5 assert em.get_error(20.0, 50.0) == 100.0
def optimize_value_error_test(self): """Test the optimize call.""" bom = BaseOptimizationMethod(BaseErrorMeasure, precision=-3) bm = BaseMethod() bom.optimize(TimeSeries(), [bm]) try: bom.optimize(TimeSeries(), []) except ValueError: pass else: assert False # pragma: no cover
def is_normalized_test(self): """Test TimeSeries.is_normalized().""" ts = TimeSeries(isNormalized=True) assert ts.is_normalized() ts = TimeSeries(isNormalized=False) assert ts.is_normalized() ts.add_entry(0.1, 3.2) ts.add_entry(0.4, 3.2) ts.add_entry(0.3, 3.2) assert False == ts.is_normalized()
def error_calculation_test(self): """Test the calculation of the MeanAbsolutePercentageError.""" dataOrg = [[1,1], [2,2], [3,3], [4,4], [5,5], [6,6], [7,8], [7.3, 5], [8, 0], [9,10]] dataCalc = [[1,3], [2,5], [3,0], [4,3], [5,5], [6.1,6], [7,3], [7.3, 5], [8, 0], [9,9]] # abs difference: 2 3 3 1 0 NA 5 0 NA 1 # local errors: 200 150 100 25 0 NA 62,5 0 NA 10 # sum: 547,5 tsOrg = TimeSeries.from_twodim_list(dataOrg) tsCalc = TimeSeries.from_twodim_list(dataCalc) mape = MeanAbsolutePercentageError() mape.initialize(tsOrg, tsCalc) assert str(mape.get_error())[:6] == "68.437"
def error_calculation_test(self): """Test the calculation of the GeometricMeanAbsolutePercentageError.""" dataOrg = [[1,1], [2,2], [3,3], [4,4], [5,5], [6,6], [7,8], [7.3, 5], [8, 0], [9,10]] dataCalc = [[1,3], [2,5], [3,0], [4,3], [5,6], [6.1,6], [7,3], [7.3, 5], [8, 0], [9,9]] # abs difference: 2 3 3 1 1 NA 5 0 NA 1 # local errors: 200 150 100 25 20 NA 62,5 0 NA 10 # product: 937500000000 tsOrg = TimeSeries.from_twodim_list(dataOrg) tsCalc = TimeSeries.from_twodim_list(dataCalc) gmape = GeometricMeanAbsolutePercentageError() gmape.initialize(tsOrg, tsCalc) assert str(gmape.get_error())[:6] == "31.368"
def error_calculation_test(self): """Test the MdAPE error calculation.""" dataOrg = [[1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 8], [7.3, 5], [8, 0], [9, 10]] dataCalc = [[1, 3], [2, 5], [3, 0], [4, 3], [5, 5], [6.1, 6], [7, 3], [7.3, 5], [8, 0], [9, 9]] tsOrg = TimeSeries.from_twodim_list(dataOrg) tsCalc = TimeSeries.from_twodim_list(dataCalc) em = MedianAbsolutePercentageError() em.initialize(tsOrg, tsCalc) assert em.get_error() == 62.5 assert em.get_error(20.0, 50.0) == 100.0
def local_error_test(self): """Test local_error of BaseErrorMeasure.""" data = [[0.0, 0.0], [1, 0.1], [2, 0.2], [3, 0.3], [4, 0.4]] tsOrg = TimeSeries.from_twodim_list(data) tsCalc = TimeSeries.from_twodim_list(data) bem = BaseErrorMeasure() for idx in range(len(tsOrg)): try: bem.local_error([tsOrg[idx][1]], [tsCalc[idx][1]]) except NotImplementedError: pass else: assert False # pragma: no cover
def local_error_test(self): """Test local_error of BaseErrorMeasure.""" data = [[0.0, 0.0], [1, 0.1], [2, 0.2], [3, 0.3], [4, 0.4]] tsOrg = TimeSeries.from_twodim_list(data) tsCalc = TimeSeries.from_twodim_list(data) bem = BaseErrorMeasure() for idx in xrange(len(tsOrg)): try: bem.local_error([tsOrg[idx][1]], [tsCalc[idx][1]]) except NotImplementedError: pass else: assert False # pragma: no cover
def calculate_parameters_with_confidence(self, independentTs, dependentTs, confidenceLevel, samplePercentage=.1): """Same functionality as calculate_parameters, just that additionally the confidence interval for a given confidenceLevel is calculated. This is done based on a sample of the dependentTs training data that is validated against the prediction. The signed error of the predictions and the sample is then used to calculate the bounds of the interval. further reading: http://en.wikipedia.org/wiki/Confidence_interval :param Timeseries independentTs: The Timeseries used for the independent variable (x-axis). The Timeseries must have at least 2 datapoints with different dates and values :param Timeseries dependentTs: The Timeseries used as the dependent variable (y-axis). The Timeseries must have at least 2 datapoints, which dates match with independentTs :param float confidenceLevel: The percentage of entries in the sample that should have an prediction error closer or equal to 0 than the bounds of the confidence interval. :param float samplePercentage: How much of the dependentTs should be used for sampling :return: A tuple containing the y-axis intercept and the slope used to execute the regression and the (underestimation, overestimation) for the given confidenceLevel :rtype: tuple :raise: Raises an :py:exc:`ValueError` if - independentTs and dependentTs have not at least two matching dates - independentTs has only one distinct value - The dates in one or both Timeseries are not distinct. """ #First split the time series into sample and training data sampleY, trainingY = dependentTs.sample(samplePercentage) sampleX_list = self.match_time_series(sampleY, independentTs)[1] trainingX_list = self.match_time_series(trainingY, independentTs)[1] sampleX = TimeSeries.from_twodim_list(sampleX_list) trainingX = TimeSeries.from_twodim_list(trainingX_list) #Then calculate parameters based on the training data n, m = self.calculate_parameters(trainingX, trainingY) #predict prediction = self.predict(sampleX, n, m) #calculate the signed error at each location, note that MSD(x,y) != MSD(y,x) msd = MSD() msd.initialize(prediction, sampleY) return (n, m, msd.confidence_interval(confidenceLevel))
def validity_of___str___test(self): """Test the validity of __str__ for a given TimeSeries.""" ts = TimeSeries() ts.add_entry(0.0, 0.0) ts.add_entry(0.1, 0.1) ts.add_entry(0.2, 0.2) ts.add_entry(0.3, 0.3) ts.add_entry(0.4, 0.4) matchres = re.match("TimeSeries\(\[(.*)\]\)", ts.__str__()) assert (None != matchres)
def initialize_test(self): """Test if calculate throws an error as expected.""" data = [[0.0, 0.0], [1, 0.1], [2, 0.2], [3, 0.3], [4, 0.4]] tsOrg = TimeSeries.from_twodim_list(data) tsCalc = TimeSeries.from_twodim_list(data) bem = BaseErrorMeasure() try: bem.initialize(tsOrg, tsCalc) except NotImplementedError: pass else: assert False # pragma: no cover assert not bem.initialize(tsOrg, TimeSeries())
def optimize(request): """ Performs Holt Winters Parameter Optimization on the given post data. Expects the following values set in the post of the request: seasonLength - integer valuesToForecast - integer data - two dimensional array of [timestamp, value] """ #Parse arguments seasonLength = int(request.POST.get('seasonLength', 6)) valuesToForecast = int(request.POST.get('valuesToForecast', 0)) data = json.loads(request.POST.get('data', [])) original = TimeSeries.from_twodim_list(data) original.normalize("day") #due to bug in TimeSeries.apply original.set_timeformat("%d.%m") #optimize smoothing hwm = HoltWintersMethod(seasonLength = seasonLength, valuesToForecast = valuesToForecast) gridSearch = GridSearch(SMAPE) optimal_forecasting, error, optimal_params = gridSearch.optimize(original, [hwm]) #perform smoothing smoothed = optimal_forecasting.execute(original) smoothed.set_timeformat("%d.%m") result = { 'params': optimal_params, 'original': original, 'smoothed': smoothed, 'error': round(error.get_error(), 3) } return Response(json.dumps(result, cls=PycastEncoder), content_type='application/json')
def error_calculation_test(self): """Test the calculation of the MeanAbsolutePercentageError.""" dataOrg = [[1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 8], [7.3, 5], [8, 0], [9, 10]] dataCalc = [[1, 3], [2, 5], [3, 0], [4, 3], [5, 5], [6.1, 6], [7, 3], [7.3, 5], [8, 0], [9, 9]] # abs difference: 2 3 3 1 0 NA 5 0 NA 1 # local errors: 200 150 200 50 0 NA 125 0 NA 20 # sum: 745 tsOrg = TimeSeries.from_twodim_list(dataOrg) tsCalc = TimeSeries.from_twodim_list(dataCalc) wmape = WeightedMeanAbsolutePercentageError() wmape.initialize(tsOrg, tsCalc) assert str(wmape.get_error())[:6] == "93.125"
def normalization_illegal_parameter_test(self): """Test illegal parameter of TimeSeries.normalize().""" data = [[0.0, 0.0], [1.0, 1.0], [2.0, 2.0], [5.0, 5.0]] ts = TimeSeries.from_twodim_list(data) try: ts.normalize(normalizationLevel="ILLEGAL_PARAMETER") except ValueError: pass else: assert False # pragma: no cover try: ts.normalize(fusionMethod="ILLEGAL_PARAMETER") except ValueError: pass else: assert False # pragma: no cover try: ts.normalize(interpolationMethod="ILLEGAL_PARAMETER") except ValueError: pass else: assert False # pragma: no cover