def execute(self, timeSeries): """Creates a new TimeSeries containing the SMA values for the predefined windowsize. :param TimeSeries timeSeries: The TimeSeries used to calculate the simple moving average values. :return: TimeSeries object containing the smooth moving average. :rtype: TimeSeries :raise: Raises a :py:exc:`ValueError` wif the defined windowsize is larger than the number of elements in timeSeries :note: This implementation aims to support independent for loop execution. """ windowsize = self._parameters["windowsize"] if len (timeSeries) < windowsize: raise ValueError("windowsize is larger than the number of elements in timeSeries.") tsLength = len(timeSeries) nbrOfLoopRuns = tsLength - windowsize + 1 res = TimeSeries() for idx in xrange(nbrOfLoopRuns): end = idx + windowsize data = timeSeries[idx:end] timestamp = data[windowsize//2][0] value = sum([i[1] for i in data])/windowsize res.add_entry(timestamp, value) res.sort_timeseries() return res
def train_target(self, data_list, model_list):# data_list: [date, value] orig = TimeSeries(isNormalized=True) for i in range(len(data_list)): orig.add_entry(data_list[i][0], data_list[i][1]) gridSearch = GridSearch(SMAPE) optimal_forecasting, error, optimal_params = gridSearch.optimize(orig, model_list) #print "======" + str(optimal_forecasting._parameters) return optimal_forecasting
def is_normalized_test(self): """Test TimeSeries.is_normalized().""" ts = TimeSeries(isNormalized=True) assert ts.is_normalized() ts = TimeSeries(isNormalized=False) assert ts.is_normalized() ts.add_entry(0.1, 3.2) ts.add_entry(0.4, 3.2) ts.add_entry(0.3, 3.2) assert False == ts.is_normalized()
def list_initialization_test(self): """Test TimeSeries initialization from a given list.""" data = [[0.0, 0.0], [0.1, 0.1], [0.2, 0.2], [0.3, 0.3], [0.4, 0.4], [0.5, 0.5]] tsOne = TimeSeries() for entry in data: tsOne.add_entry(*entry) tsTwo = TimeSeries.from_twodim_list(data) if not (len(tsOne) == len(tsTwo)): raise AssertionError if not (tsOne == tsTwo): raise AssertionError
def error_calculation_test(self): """Test the calculation of the MeanSquaredError.""" tsOrg = TimeSeries() tsCalc = TimeSeries() for idx in range(len(self.dataOrg)): tsOrg.add_entry(float(idx), self.dataOrg[idx]) tsCalc.add_entry(float(idx), self.dataCalc[idx]) mse = MeanSquaredError() mse.initialize(tsOrg, tsCalc) self.assertEquals("0.1472", str(mse.get_error())[:6])
def error_calculation_test(self): """Test the calculation of the MeanSquaredError.""" tsOrg = TimeSeries() tsCalc = TimeSeries() for idx in xrange(len(self.dataOrg)): tsOrg.add_entry(float(idx), self.dataOrg[idx]) tsCalc.add_entry(float(idx), self.dataCalc[idx]) mse = MeanSquaredError() mse.initialize(tsOrg, tsCalc) self.assertEquals("0.1472", str(mse.get_error())[:6])
def error_calculation_test(self): """Test the calculation of the MeanAbsolutePercentageError.""" tsOrg = TimeSeries() tsCalc = TimeSeries() for idx in range(len(self.dataOrg)): tsOrg.add_entry(float(idx), self.dataOrg[idx]) tsCalc.add_entry(float(idx), self.dataCalc[idx]) mape = MeanAbsolutePercentageError() mape.initialize(tsOrg, tsCalc) self.assertEquals("139.78", str(mape.get_error())[:6])
def error_calculation_test(self): """Test the calculation of the MeanAbsolutePercentageError.""" tsOrg = TimeSeries() tsCalc = TimeSeries() for idx in xrange(len(self.dataOrg)): tsOrg.add_entry(float(idx), self.dataOrg[idx]) tsCalc.add_entry(float(idx), self.dataCalc[idx]) mape = MeanAbsolutePercentageError() mape.initialize(tsOrg, tsCalc) self.assertEquals("139.78", str(mape.get_error())[:6])
def error_calculation_test(self): msd = MeanSignedDifferenceError() tsOrg = TimeSeries() tsCalc = TimeSeries() for idx in xrange(len(self.dataOrg)): tsOrg.add_entry(float(idx), self.dataOrg[idx]) tsCalc.add_entry(float(idx), self.dataCalc[idx]) msd.initialize(tsOrg, tsCalc) self.assertEquals(str(msd.get_error())[:6], '0.0727')
def error_calculation_test(self): """ Test error calculation for MedianAbsolutePercentageError""" mdape = MedianAbsolutePercentageError() tsOrg = TimeSeries() tsCalc = TimeSeries() for idx in range(len(self.dataOrg)): tsOrg.add_entry(float(idx), self.dataOrg[idx]) tsCalc.add_entry(float(idx), self.dataCalc[idx]) mdape.initialize(tsOrg, tsCalc) self.assertEqual(mdape.get_error(), 100) self.assertEqual(mdape.get_error(20.0, 50.0), 50)
def error_calculation_test(self): """ Test error calculation for MedianAbsolutePercentageError""" mdape = MedianAbsolutePercentageError() tsOrg = TimeSeries() tsCalc = TimeSeries() for idx in xrange(len(self.dataOrg)): tsOrg.add_entry(float(idx), self.dataOrg[idx]) tsCalc.add_entry(float(idx), self.dataCalc[idx]) mdape.initialize(tsOrg, tsCalc) self.assertEqual(mdape.get_error(), 100) self.assertEqual(mdape.get_error(20.0, 50.0), 50)
def error_calculation_test(self): """Test the calculation of the Mean Absolute Deviation Error.""" #dataPtsOrg = [2.30, .373, .583, 1.88, 1.44, -0.0852, -.341, .619, .131, 1.27, 0] #dataPtsCalc = [-1.21, -.445, .466, .226, -.694, -.575, 2.73, -1.49, -1.45, -.193, 0] tsOrg = TimeSeries() tsCalc = TimeSeries() for idx in xrange(len(self.dataOrg)): tsOrg.add_entry(float(idx), self.dataOrg[idx]) tsCalc.add_entry(float(idx), self.dataCalc[idx]) mad = MeanAbsoluteDeviationError() mad.initialize(tsOrg, tsCalc) # compare the strings due to accuracy self.assertEqual("0.3454", str(mad.get_error())[:6])
def error_calculation_test(self): """Test the calculation of the SymmetricMeanAbsolutePercentageError.""" dataPtsOrg = [2.30, .373, .583, 1.88, 1.44, -0.0852, -.341, .619, .131, 1.27, 0] dataPtsCalc = [-1.21, -.445, .466, .226, -.694, -.575, 2.73, -1.49, -1.45, -.193, 0] tsOrg = TimeSeries() tsCalc = TimeSeries() for idx in xrange(len(dataPtsOrg)): tsOrg.add_entry(float(idx), dataPtsOrg[idx]) tsCalc.add_entry(float(idx), dataPtsCalc[idx]) smape = SymmetricMeanAbsolutePercentageError() smape.initialize(tsOrg, tsCalc) ## compare the strings due to accuracy assert "1.5706" == str(smape.get_error())[:6]
def error_calculation_test(self): """Test the calculation of the SymmetricMeanAbsolutePercentageError.""" dataPtsOrg = [ 2.30, .373, .583, 1.88, 1.44, -0.0852, -.341, .619, .131, 1.27, 0 ] dataPtsCalc = [ -1.21, -.445, .466, .226, -.694, -.575, 2.73, -1.49, -1.45, -.193, 0 ] tsOrg = TimeSeries() tsCalc = TimeSeries() for idx in xrange(len(dataPtsOrg)): tsOrg.add_entry(float(idx), dataPtsOrg[idx]) tsCalc.add_entry(float(idx), dataPtsCalc[idx]) smape = SymmetricMeanAbsolutePercentageError() smape.initialize(tsOrg, tsCalc) ## compare the strings due to accuracy assert "1.5706" == str(smape.get_error())[:6]
def check_for_consistency_test(self): """Tests if database initialization and manual initialization create equal TimeSeries instances.""" ## read the number of rows from the database cur = self._db.cursor().execute("""SELECT COUNT(*) from TestTable""") nbrOfTuples = cur.fetchall()[0][0] ## SQL extraction statement sqlstmt = """SELECT timestamp, value FROM TestTable ORDER BY timestamp ASC""" ## Initialize one TimeSeries instance manually tsManual = TimeSeries() data = self._db.cursor().execute(sqlstmt).fetchall() for entry in data: tsManual.add_entry(*entry) ## Initialize one TimeSeries from SQL cursor tsAuto = TimeSeries() tsAuto.initialize_from_sql_cursor(self._db.cursor().execute(sqlstmt)) ## check if those TimeSeries are equal if not (nbrOfTuples == len(tsManual)): raise AssertionError if not (nbrOfTuples == len(tsAuto)): raise AssertionError if not (len(tsManual) == len(tsAuto)): raise AssertionError if not (tsManual == tsAuto): raise AssertionError
def check_for_consistency_test(self): """Tests if database initialization and manual initialization create equal TimeSeries instances.""" # read the number of rows from the database cur = self._db.cursor().execute("""SELECT COUNT(*) from TestTable""") nbrOfTuples = cur.fetchall()[0][0] # SQL extraction statement sqlstmt = """SELECT timestamp, value FROM TestTable ORDER BY timestamp ASC""" # Initialize one TimeSeries instance manually tsManual = TimeSeries() data = self._db.cursor().execute(sqlstmt).fetchall() for entry in data: tsManual.add_entry(str(entry[0]), entry[1]) # Initialize one TimeSeries from SQL cursor tsAuto = TimeSeries() tsAuto.initialize_from_sql_cursor(self._db.cursor().execute(sqlstmt)) # check if those TimeSeries are equal assert (nbrOfTuples == len(tsManual)) assert (nbrOfTuples == len(tsAuto)) assert (len(tsManual) == len(tsAuto)) assert (tsManual == tsAuto)
def validity_of___str___test(self): """Test the validity of __str__ for a given TimeSeries.""" ts = TimeSeries() ts.add_entry(0.0, 0.0) ts.add_entry(0.1, 0.1) ts.add_entry(0.2, 0.2) ts.add_entry(0.3, 0.3) ts.add_entry(0.4, 0.4) matchres = re.match("TimeSeries\(\[(.*)\]\)", ts.__str__()) assert (None != matchres)
def json_serialization_formatfree_test(self): """Test the json serialialization without predefined format.""" tsOrg = TimeSeries() tsOrg.add_entry(0.0, 0.0) tsOrg.add_entry(0.1, 0.1) tsOrg.add_entry(0.2, 0.2) tsOrg.add_entry(0.3, 0.3) tsOrg.add_entry(0.4, 0.4) json = tsOrg.to_json() tsNew = TimeSeries.from_json(json) if not (len(tsOrg) == len(tsNew)): raise AssertionError if not (tsOrg == tsNew): raise AssertionError
def json_serialization_format_test(self): """Test the json serialialization with predefined format.""" tsOrg = TimeSeries() tsOrg.add_entry(0.0, 0.0) tsOrg.add_entry(1.0, 0.1) tsOrg.add_entry(2.0, 0.2) tsOrg.add_entry(3.0, 0.3) tsOrg.add_entry(4.0, 0.4) tsOrg.set_timeformat("%Y-%m-%d_%H:%M:%S") json = tsOrg.to_json() tsNew = TimeSeries.from_json(json, format="%Y-%m-%d_%H:%M:%S") if not (len(tsOrg) == len(tsNew)): raise AssertionError if not (tsOrg == tsNew): raise AssertionError
def method_test(self): """Test if TimeSeries apply branches work correctly. This is mainly to increase code coverage.""" mOne = BaseMethod([], hasToBeSorted=True, hasToBeNormalized=True) mTwo = BaseMethod([], hasToBeSorted=False, hasToBeNormalized=True) mThree = BaseMethod([], hasToBeSorted=True, hasToBeNormalized=False) mFour = BaseMethod([], hasToBeSorted=False, hasToBeNormalized=False) ts = TimeSeries(isNormalized=True) ts.add_entry(0.0, 0.0) ts.add_entry(0.1, 0.1) ts.add_entry(0.2, 0.2) ts.add_entry(0.3, 0.3) ts.add_entry(0.4, 0.4) try: ts.apply(mOne) except NotImplementedError: pass else: assert False # pragma: no cover try: ts.apply(mTwo) except NotImplementedError: pass else: assert False # pragma: no cover try: ts.apply(mThree) except NotImplementedError: pass else: assert False # pragma: no cover try: ts.apply(mFour) except NotImplementedError: pass else: assert False # pragma: no cover
from pycast.optimization import GridSearch from pycast.errors import SymmetricMeanAbsolutePercentageError as SMAPE with open('MC1001_season_indices.csv','r') as season_indices: season_indices.readline() #header with open('MC1001.csv', 'r') as data_file: counter = 1 for series_line in data_file.readlines()[1:]: #series line has format: Series,N Obs,Seasonality,NF,Type,Starting date,Category,1,2,3 ... series_tuples = series_line.split(',') number_obeservations = int(series_tuples[1]) number_forecast = int(series_tuples[3]) orig = TimeSeries(isNormalized=True) for i in range(number_obeservations): orig.add_entry(i, series_tuples[i + 7]) #offset to first data entry in line #print orig forecast_check = TimeSeries(isNormalized=True) for j in range(number_obeservations, number_obeservations + number_forecast): forecast_check.add_entry(j, float(series_tuples[j + 7])) #print forecast_check #Season indices are given in season file season_indices_tuple = season_indices.readline().split(',') seasonLength = int(season_indices_tuple[1]) season_values = [] for i in range(seasonLength): season_values.append(float(season_indices_tuple[i + 2])) #print season_values
from pycast.errors import SymmetricMeanAbsolutePercentageError as SMAPE with open('MC1001_season_indices.csv', 'r') as season_indices: season_indices.readline() #header with open('MC1001.csv', 'r') as data_file: counter = 1 for series_line in data_file.readlines()[1:]: #series line has format: Series,N Obs,Seasonality,NF,Type,Starting date,Category,1,2,3 ... series_tuples = series_line.split(',') number_obeservations = int(series_tuples[1]) number_forecast = int(series_tuples[3]) orig = TimeSeries(isNormalized=True) for i in range(number_obeservations): orig.add_entry( i, series_tuples[i + 7]) #offset to first data entry in line #print orig forecast_check = TimeSeries(isNormalized=True) for j in range(number_obeservations, number_obeservations + number_forecast): forecast_check.add_entry(j, float(series_tuples[j + 7])) #print forecast_check #Season indices are given in season file season_indices_tuple = season_indices.readline().split(',') seasonLength = int(season_indices_tuple[1]) season_values = [] for i in range(seasonLength):
__author__ = 'wangchao' import sys sys.path.append('./pycast-master/') import pycast from pycast.common.timeseries import TimeSeries from pycast.methods import HoltWintersMethod from pycast.optimization import GridSearch from pycast.errors import SymmetricMeanAbsolutePercentageError as SMAPE data_list = [1.1, 2.1, 3.3, 4.4, 5.5, 1.1, 2.2, 3.3] orig = TimeSeries(isNormalized=True) for i in range(len(data_list)): orig.add_entry(i, data_list[i]) #offset to first data entry in line seasonLength = int(5) season_values = [] for i in range(seasonLength): season_values.append(float(1.0)) #print season_values hwm = HoltWintersMethod(seasonLength = seasonLength, valuesToForecast = 3) hwm.set_parameter("seasonValues", season_values) gridSearch = GridSearch(SMAPE) optimal_forecasting, error, optimal_params = gridSearch.optimize(orig, [hwm]) predicted = optimal_forecasting.execute(orig)