def predict( self, targetSeries, exogenousSeries=None, ): """ Forecast using the model parameters on the provided input data :param targetSeries: Series of the Target Variable, it should be a numpy array of shape (n, numTargetVariables) :param exogenousSeries: Series of exogenous Variables, it should be a numpy array of shape (n, numExoVariables), it can be None only if numExoVariables is 0 in which case the exogenous variables are not considered :return: Forecast targets predicted by the model, it has shape (n,), the horizon of the targets is the same as self.forecastHorizon """ logger = GlobalLogger.getLogger() logger.log(f'Target Series Shape: {targetSeries.shape}', 2, self.predict.__name__) if exogenousSeries is not None: logger.log( f'Exogenous Series Shape: {exogenousSeries.shape}', 2, self.predict.__name__ ) logger.log('Prepare Data', 1, self.predict.__name__) assert targetSeries.shape[1] == self.numTargetVariables assert (Utility.isExoShapeValid(exogenousSeries, self.numExoVariables)) X = Utility.prepareDataPred(targetSeries, exogenousSeries) logger.log('Begin Prediction', 1, self.predict.__name__) return tf.squeeze(self.model.predict(np.expand_dims(X, axis=0), verbose=0), axis=0)
def predict(self, targetSeries, exogenousSeries=None): """ Forecast using the model parameters on the provided input data :param targetSeries: Univariate Series of the Target Variable, it should be a numpy array of shape (n,) :param exogenousSeries: Series of exogenous Variables, it should be a numpy array of shape (n, numExoVariables), it can be None only if numExoVariables is 0 in which case the exogenous variables are not considered :return: Forecast targets predicted by the model, it has shape (n,), the horizon of the targets is the same as self.forecastHorizon """ logger = GlobalLogger.getLogger() logger.log('Begin Prediction', 1, self.predict.__name__) assert (Utility.isExoShapeValid(exogenousSeries, self.inputDimension - 1)) X = Utility.prepareDataPred(targetSeries, exogenousSeries) n = X.shape[0] Ypred = [None] * n for t in range(n): Ypred[t] = self.predictTimestep(X, t) Ypred = np.array(Ypred) logger.log(f'Output Shape: {Ypred.shape}', 2, self.predict.__name__) return Ypred
def test_trainTestSplit(data, train, val): """ Tests Utility.trainTestSplit """ if train < 1.0: train = round(data.shape[0] * train) # If validation set is not required if val is None: dataTrain, dataTest = Utility.trainTestSplit(data, train, None) # train and test data together should give entire data assert np.array_equal(np.concatenate((dataTrain, dataTest), axis=0), data) # Train data must have the required number of elements assert dataTrain.shape[0] == train return dataTrain, dataVal, dataTest = Utility.trainTestSplit(data, train, val) # train, val and test data together should give entire data assert np.array_equal( np.concatenate((dataTrain, dataVal, dataTest), axis=0), data) if val < 1.0: val = round(data.shape[0] * val) # Train and Val data must have the required number of elements assert dataTrain.shape[0] == train assert dataVal.shape[0] == val
def main(): n = 21500 trainN = 21000 seqLength = 500 numSeqPlot = 5 trainData, testData = Utility.trainTestSplit( StandardGenerator('long_term').generate(n), trainN ) trainSequences = Utility.breakSeq(trainData, seqLength=seqLength) # for i in range(numSeqPlot): # Plot.plotDataCols(trainSequences[ # np.random.randint(0, len(trainSequences)) # ]) model = LstmForecast( forecastHorizon=1, stateSize=50, activation='tanh', numRnnLayers=3 ) model.model.summary() loss = model.train( trainSequences=trainSequences, numIterations=15, optimizer=tf.keras.optimizers.Adam( learning_rate=tf.keras.optimizers.schedules.ExponentialDecay( 0.01, 20, 0.96 ) ) ) Plot.plotLoss(loss) for i in range(numSeqPlot): idx = np.random.randint(0, len(trainSequences)) evalLoss, Ypred = model.evaluate(trainSequences[idx], returnPred=True) Ytrue = trainSequences[idx][1:] Plot.plotPredTrue(Ypred, Ytrue, 'On Train') evalLoss, Ypred = model.evaluate(testData, returnPred=True) Ytrue = testData[1:] Plot.plotPredTrue(Ypred, Ytrue, 'On Test')
def evaluate( self, targetSeries, exogenousSeries=None, returnPred=False ): """ Forecast using the model parameters on the provided data, evaluates the forecast result using the loss and returns it :param targetSeries: Series of the Target Variable, it should be a numpy array of shape (numTimesteps + self.forecastHorizon, numTargetVariables). numTimesteps is the number of timesteps on which our model must predict, the values ahead are for evaluating the predicted results with respect to them (i.e. they are true targets for our prediction) :param exogenousSeries: Series of exogenous Variables, it should be a numpy array of shape (numTimesteps, numExoVariables), it can be None only if numExoVariables is 0 in which case the exogenous variables are not considered :param returnPred: If True, then return predictions along with loss, else return on loss :return: If True, then return predictions along with loss of the predicted and true targets, else return only loss """ logger = GlobalLogger.getLogger() logger.log(f'Target Series Shape: {targetSeries.shape}', 2, self.evaluate.__name__) if exogenousSeries is not None: logger.log( f'Exogenous Series Shape: {exogenousSeries.shape}', 2, self.evaluate.__name__ ) logger.log('Prepare Data', 1, self.evaluate.__name__) assert targetSeries.shape[1] == self.numTargetVariables assert Utility.isExoShapeValid(exogenousSeries, self.numExoVariables) X, Ytrue = Utility.prepareDataTrain(targetSeries, exogenousSeries, self.forecastHorizon) logger.log('Begin Evaluation', 1, self.predict.__name__) Ypred = tf.squeeze(self.model.predict(np.expand_dims(X, axis=0), verbose=0), axis=0) loss = tf.keras.losses.MeanSquaredError()( Ytrue, Ypred ) if returnPred: return loss, Ypred else: return loss
def main(): n = 20200 trainN = 20000 seqLength = 500 data = np.expand_dims(StandardGenerator('long_term').generate(n), axis=1) trainData, testData = Utility.trainTestSplit(data, trainN) trainSequences = Utility.breakTrainSeq(trainData, None, seqLength) forecastHorizon = 1 lag = 30 model = DeepNN( forecastHorizon=forecastHorizon, lag=lag, numUnitsPerLayer=10, numLayers=2, numTargetVariables=1, numExoVariables=0 ) loss = model.train( trainSequences=trainSequences, numIterations=20, optimizer=tf.keras.optimizers.Adam( learning_rate=tf.keras.optimizers.schedules.ExponentialDecay( 0.1, 25, 0.97 ) ), verboseLevel=2, returnLosses=True ) Plot.plotLoss(loss) evalLoss, Ypred = model.evaluate( testData, returnPred=True ) Ytrue = testData[lag + forecastHorizon:, :] print(f'Eval Loss: {evalLoss}') Plot.plotPredTrue(Ypred, Ytrue)
def prepareDataPredDNN( targetSeries, exogenousSeries, lag ): """ Prepare Data For Prediction :param targetSeries: Multivariate Series of the Target Variable, it should be a numpy array of shape (lag + nPred, numTargetVariables) :param exogenousSeries: Series of exogenous Variables, it should be a numpy array of shape (lag + nPred, numExoVariables), it can be None only if numExoVariables is 0 in which case the exogenous variables are not considered :param lag: The lag to be considered :return: Prepared Feature Data X of shape (nPred, numTargetVariables + numExoVariables) """ Xtemp = Utility.prepareDataPred( targetSeries, exogenousSeries ) X = [] for i in range(lag, Xtemp.shape[0]): vecLen = (lag + 1) * Xtemp.shape[1] vec = np.reshape(Xtemp[i - lag: i + 1, :], (vecLen,)) X.append(vec) X = np.array(X) return X
def test_prepareDataPred(targetSeries, exogenousSeries): """ Tests Utility.prepareDataPred """ if exogenousSeries is not None: assert targetSeries.shape[0] == exogenousSeries.shape[0] n = targetSeries.shape[0] X = Utility.prepareDataPred(targetSeries, exogenousSeries) if len(targetSeries.shape) == 1: d1 = 1 else: d1 = targetSeries.shape[1] if exogenousSeries is None: d2 = 0 else: d2 = exogenousSeries.shape[1] # Shape of features must equal to (n, d1 + d2) assert X.shape == (n, d1 + d2) for i in range(n): x = targetSeries[i] if not isinstance(x, np.ndarray): x = np.array([x]) if exogenousSeries is not None: x = np.concatenate((x, exogenousSeries[i]), axis=0) # Concatenation of ith target and exo elements must be # the ith element of features X assert np.array_equal(x, X[i])
def test_convertToTrainSeq(dataSequences, containsExo, forecastHorizon): """ Tests Utility.convertToTrainSeq """ trainSequences = \ Utility.convertToTrainSeq(dataSequences, containsExo, forecastHorizon) dataSeqIdx = 0 trainSeqIdx = 0 while dataSeqIdx < len(dataSequences): if containsExo: (dataTarget, dataExo) = dataSequences[dataSeqIdx] dataLen = dataTarget.shape[0] if dataLen > forecastHorizon: (trainTarget, trainExo) = trainSequences[trainSeqIdx] assert np.array_equal(trainTarget, dataTarget) assert np.array_equal(trainExo, dataExo[:dataLen - forecastHorizon]) trainSeqIdx += 1 else: dataTarget = dataSequences[dataSeqIdx] dataLen = dataTarget.shape[0] if dataLen > forecastHorizon: trainTarget = trainSequences[trainSeqIdx] assert np.array_equal(trainTarget, dataTarget) trainSeqIdx += 1 dataSeqIdx += 1 assert trainSeqIdx == len(trainSequences)
def __getitem__(self, idx): """ Gets the batch corresponding to the provided index :param idx: Index of the batch which is requested :return: The (idx)th batch """ if type(self.trainSequences[idx]) is tuple: targetSeries = self.trainSequences[idx][0] exogenousSeries = self.trainSequences[idx][1] else: targetSeries = self.trainSequences[idx] exogenousSeries = None assert ( len(targetSeries.shape) == 2 and targetSeries.shape[1] == self.numTargetVariables ) assert (Utility.isExoShapeValid(exogenousSeries, self.numExoVariables)) X, Y = DeepNN.prepareDataTrainDNN( targetSeries, exogenousSeries, self.forecastHorizon, self.lag ) return X, Y
def test_prepareDataTrain(targetSeries, exogenousSeries, forecastHorizon): """ Tests Utility.prepareDataTrain """ if exogenousSeries is not None: assert targetSeries.shape[ 0] == exogenousSeries.shape[0] + forecastHorizon assert targetSeries.shape[0] > forecastHorizon n = targetSeries.shape[0] - forecastHorizon X, Y = Utility.prepareDataTrain(targetSeries, exogenousSeries, forecastHorizon) assert n == X.shape[0] == Y.shape[ 0] # Shapes of features and targets must agree # The targets must equal to the target series starting at 'forecastHorizon' assert np.array_equal(Y, targetSeries[forecastHorizon:]) xConstruct = targetSeries[:n] if len(xConstruct.shape) == 1: xConstruct = np.expand_dims(xConstruct, axis=1) if exogenousSeries is not None: xConstruct = np.concatenate((xConstruct, exogenousSeries), axis=1) # Features must match the concatenation of target series and exo series assert np.array_equal(X, xConstruct)
def main(): # The data generator dataGenerator = StandardGenerator('long_term') # Data for single-sequence methods n = 21500 trainN = 21000 trainData, testData = Utility.trainTestSplit( dataGenerator.generate(n), train=trainN ) # Method 1 - train on mutually exclusive sequences seqLength = 500 trainSequences = Utility.breakSeq(trainData, seqLength) tryModelOneSeq(trainSequences, testData, 'method1', PLOT_DIR) # Method 2 - train on randomly sampled contiguous sequences seqLength = 500 numSeq = 42 trainSequences = [ trainData[startIdx: startIdx + seqLength] for startIdx in list(np.random.randint( 0, trainN - seqLength, size=(numSeq,) )) ] tryModelOneSeq(trainSequences, testData, 'method2', PLOT_DIR) # Method 3 - train on the single long sequence trainSequences = [trainData] tryModelOneSeq(trainSequences, testData, 'method3', PLOT_DIR) # Multiple Independent Train Sequences seqLength = 500 numSeq = 42 trainSequences = Utility.generateMultipleSequence( dataGenerator=dataGenerator, numSequences=numSeq, minSequenceLength=seqLength, maxSequenceLength=seqLength ) testData = dataGenerator.generate(seqLength) tryModelMultiSeq(trainSequences, testData, 'multiseq', PLOT_DIR)
def test_breakSeq(data: np.ndarray, seqLength: int): """ Tests Utility.breakSeq """ dataSeq = Utility.breakSeq(data, seqLength) # On concatenating the dataSeq, we should get back data assert np.array_equal(np.concatenate(dataSeq, axis=0), data) # length of each seq except the last should be exactly seqLength for seq in dataSeq[:-1]: assert seq.shape[0] == seqLength
def test_generateMultipleSequence(dataGenerator, numSequences, minSequenceLength, maxSequenceLength): """ Tests Utility.generateMultipleSequence """ dataSeq = Utility.generateMultipleSequence(dataGenerator, numSequences, minSequenceLength, maxSequenceLength) assert len(dataSeq) == numSequences for seq in dataSeq: assert minSequenceLength <= seq.shape[0] <= maxSequenceLength
def test_trainTestSplitSeries(targetSeries, exogenousSeries, train, val): """ Tests Utility.trainTestSplitSeries """ assert targetSeries.shape[0] == exogenousSeries.shape[0] n = targetSeries.shape[0] if train < 1.0: train = round(n * train) # If validation set is not required if val is None: (targetTrain, exoTrain), (targetTest, exoTest) = \ Utility.trainTestSplitSeries(targetSeries, exogenousSeries, train, None) # train and test data together should give entire data assert np.array_equal( np.concatenate((targetTrain, targetTest), axis=0), targetSeries) assert np.array_equal(np.concatenate((exoTrain, exoTest), axis=0), exogenousSeries) assert targetTrain.shape[0] == exoTrain.shape[0] == train return if val < 1.0: val = round(n * val) (targetTrain, exoTrain), (targetVal, exoVal), (targetTest, exoTest) = \ Utility.trainTestSplitSeries(targetSeries, exogenousSeries, train, val) # train, val and test data together should give entire data assert np.array_equal( np.concatenate((targetTrain, targetVal, targetTest), axis=0), targetSeries) assert np.array_equal(np.concatenate((exoTrain, exoVal, exoTest), axis=0), exogenousSeries) assert targetTrain.shape[0] == exoTrain.shape[0] == train assert targetVal.shape[0] == exoVal.shape[0] == val
def test_breakTrainSeq(targetSeries, exogenousSeries, seqLength, forecastHorizon): """ Tests Utility.breakTrainSeq """ n = targetSeries.shape[0] trainSequences = Utility.breakTrainSeq(targetSeries, exogenousSeries, seqLength, forecastHorizon) # If exogenousSeries is none, breakTrainSeq behaves differently, # here we test that behaviour if exogenousSeries is None: # On concatenating the trainSequences, we should get back data assert np.array_equal(np.concatenate(trainSequences, axis=0), targetSeries) # length of each seq except the last should be exactly seqLength for seq in trainSequences[:-1]: assert seq.shape[0] == seqLength return # Forecast horizon cannot be None assert forecastHorizon is not None # Target and Exogenous series must have same number of elements assert targetSeries.shape[0] == exogenousSeries.shape[0] # Check if the train sequences are correct startIdx = 0 for (targetSeriesSeq, exogenousSeriesSeq) in trainSequences: lenTargetSeries = targetSeriesSeq.shape[0] lenExogenousSeries = exogenousSeriesSeq.shape[0] assert lenTargetSeries == lenExogenousSeries + forecastHorizon exoEndIdx = startIdx + lenExogenousSeries targetEndIdx = exoEndIdx + forecastHorizon # Check if the broken sequence matches the correct part of the # original sequence assert np.array_equal(targetSeriesSeq, targetSeries[startIdx:targetEndIdx]) assert np.array_equal(exogenousSeriesSeq, exogenousSeries[startIdx:exoEndIdx]) startIdx = exoEndIdx assert (startIdx + forecastHorizon == n) or (n - startIdx <= forecastHorizon)
def checkShapeValid(self, targetSeries, exogenousSeries): """ Checks if shape of the target series and exogenous series is valid :param targetSeries: The target series :param exogenousSeries: The exogenous series :return: returns True if target series has a shape (n1, numTargetVariables) and exogenous series has a shape (n2, numExoVariables) if it is not None, it can be none only when numExoVariables is 0. If any of these is not satisfied, then False is returned """ return len(targetSeries.shape) == 2 and \ targetSeries.shape[1] == self.numTargetVariables and \ Utility.isExoShapeValid(exogenousSeries, self.numExoVariables)
def main(): # The data generator dataGenerator = StandardGenerator('long_term') # Generated Data n = 21500 trainN = 21000 trainData, testData = Utility.trainTestSplit(dataGenerator.generate(n), train=trainN) # Extreme Model 1 seqLength = 500 tryExtreme1(trainData, testData, seqLength, 'extreme1', PLOT_DIR) # Extreme Model 2 seqLength = 500 tryExtreme1(trainData, testData, seqLength, 'extreme2', PLOT_DIR)
def main(): n = 21500 trainN = 21000 seqLength = 500 numSeqPlot = 5 trainData, testData = Utility.trainTestSplit( StandardGenerator('long_term').generate(n), trainN) model = ExtremeTime2(forecastHorizon=1, memorySize=20, windowSize=10, embeddingSize=10, contextSize=10) loss = model.train( targetSeries=trainData, sequenceLength=seqLength, numIterations=10, optimizer=tf.keras.optimizers.Adam( learning_rate=tf.keras.optimizers.schedules.ExponentialDecay( 0.01, 50, 0.99)), verboseLevel=1, returnLosses=True) Plot.plotLoss(loss) for i in range(numSeqPlot): idx = np.random.randint(0, trainN - seqLength) seq = trainData[idx:idx + seqLength] evalLoss, Ypred = model.evaluate(seq, returnPred=True) Ytrue = seq[1:] print(f'Train Eval Loss: {evalLoss}') Plot.plotPredTrue(Ypred, Ytrue, 'On Train') evalLoss, Ypred = model.evaluate(testData, returnPred=True) Ytrue = testData[1:] print(f'Test Eval Loss: {evalLoss}') Plot.plotPredTrue(Ypred, Ytrue, 'On Test')
def prepareDataTrainDNN( targetSeries, exogenousSeries, forecastHorizon, lag ): """ Prepare Data For Training :param targetSeries: Multivariate Series of the Target Variable, it should be a numpy array of shape (lag + nTrain + forecastHorizon, numTargetVariables) :param exogenousSeries: Series of exogenous Variables, it should be a numpy array of shape (lag + nTrain, numTargetVariables), it can be None only if numExoVariables is 0 in which case the exogenous variables are not considered :param forecastHorizon: How much further in the future the model has to predict the target series variable :param lag: The lag to be considered :return: Prepared training data X of shape (nTrain, numTargetVariables + numExoVariables), Y of shape (nTrain, numTargetVariables) """ Xtemp, Ytemp = Utility.prepareDataTrain( targetSeries, exogenousSeries, forecastHorizon ) X = [] Y = Ytemp[lag:] for i in range(lag, Xtemp.shape[0]): vecLen = (lag + 1) * Xtemp.shape[1] vec = np.reshape(Xtemp[i - lag: i + 1, :], (vecLen,)) X.append(vec) X = np.array(X) return X, Y
def train(self, targetSeries, sequenceLength, exogenousSeries=None, numIterations=1, optimizer=tf.optimizers.Adam(), modelSavePath=None, verboseLevel=1, returnLosses=True): """ Train the Model Parameters on the provided data :param targetSeries: Univariate Series of the Target Variable, it should be a numpy array of shape (n + self.forecastHorizon,) :param sequenceLength: Length of each training sequence :param exogenousSeries: Series of exogenous Variables, it should be a numpy array of shape (n, numExoVariables), it can be None only if numExoVariables is 0 in which case the exogenous variables are not considered :param numIterations: Number of iterations of training to be performed :return: If returnLosses is True, then numpy array of losses of shape (numSeq,) :param optimizer: Optimizer of training the parameters :param modelSavePath: Path where to save the model parameters after each training an a sequence, if None then parameters are not saved :param verboseLevel: Verbose level, 0 is nothing, greater values increases the information printed to the console :param returnLosses: If True, then losses are returned, else losses are not returned is returned, else None is returned """ logger = GlobalLogger.getLogger() verbose = ConsoleLogger(verboseLevel) assert (Utility.isExoShapeValid(exogenousSeries, self.inputDimension - 1)) X, Y = Utility.prepareDataTrain(targetSeries, exogenousSeries, self.forecastHorizon) n = X.shape[0] logger.log(f'Seq Start Time: {self.windowSize}, Train len: {n}', 2, self.train.__name__) assert (self.windowSize < n) logger.log('Begin Training', 1, self.train.__name__) losses = [] for iteration in range(numIterations): verbose.log(f'begin iteration {iteration}', 1) seqStartTime = self.windowSize cumulIterLoss = 0.0 numSeq = 0 iterStartTime = time.time() while seqStartTime < n: seqEndTime = min(seqStartTime + sequenceLength, n - 1) startTime = time.time() loss = self.trainSequence(X, Y, seqStartTime, seqEndTime, optimizer) endTime = time.time() timeTaken = endTime - startTime cumulIterLoss += loss numSeq += 1 verbose.log( f'start timestep: {seqStartTime}' + f' | end timestep: {seqEndTime}' + f' | time taken: {timeTaken : .2f} sec' + f' | Loss: {loss}', 2) seqStartTime += sequenceLength iterEndTime = time.time() iterTimeTaken = iterEndTime - iterStartTime avgIterLoss = cumulIterLoss / numSeq verbose.log( f'Completed Iteration: {iteration}' + f' | time taken: {iterTimeTaken : .2f} sec' + f' | Avg Iteration Loss: {avgIterLoss}', 1) if returnLosses: losses.append(avgIterLoss) if modelSavePath is not None: logger.log(f'Saving Model at {modelSavePath}', 1, self.train.__name__) self.save(modelSavePath) self.buildMemory(X, n) if returnLosses: return np.array(losses)
def test_isExoShapeValid(exogenousSeries, numExoVariables, isValid): """ Tests Utility.isExoShapeValid """ assert Utility.isExoShapeValid(exogenousSeries, numExoVariables) == isValid