def plotOrigVsDetrend(): data = constructData() # Original time series data1 = constructData() origY = data1[1][0:len(data[1])-365] # Detrended time series indices = np.arange(len(data[1])-365) detrendY = statistics.detrend(indices,data[1][0:len(data[1])-365])[0] visualizer.comparisonPlot(2009,1,1,origY,detrendY, "Original","Detrended",plotName="Aggregate Electric Load : Original & Detrended", yAxisName="Kilowatts")
def plotDetrended(): data = constructData() # Plot of data after detrending with least squares regression indices = np.arange(len(data[1])) detrendY = statistics.detrend(indices,data[1])[0] visualizer.yearlyPlot(detrendY, 2009,1,1,"Detrended Aggregate Electricity Demand","Residual Kilowatts")
def plotOriginal(): data = constructData() # Plot of aggregate electricity demand over the past 5 years section = data[1][0:len(data[1]) - 365] visualizer.yearlyPlot(section, 2009, 1, 1, "Average Total Electricity Load : 2009-2013", "Kilowatts")
def plotDetrended(): data = constructData() # Plot of data after detrending with least squares regression indices = np.arange(len(data[1])) detrendY = statistics.detrend(indices, data[1])[0] visualizer.yearlyPlot(detrendY, 2009, 1, 1, "Detrended Aggregate Electricity Demand", "Residual Kilowatts")
def suppVectorRegress(): kernelList = ["linear","rbf",polyKernel] names = ["linear","radial basis","poly"] preds = [] # Retrieve time series data & apply preprocessing data = constructData() # 2014 had 365 days, but we take the last 364 days since # the last day has no numerical value #print (data[0][1430]) cutoff = len(data[0])-89 #predict march #print cutoff xTrain = data[0][0:cutoff] yTrain = data[1][0:cutoff] xTest = data[0][cutoff:] yTest = data[1][cutoff:] #print xTrain #print xTest # Fill in missing values denoted by zeroes as an average of # both neighbors statistics.estimateMissing(xTrain,0.0) statistics.estimateMissing(xTest,0.0) # Logarithmically scale the data xTrain = [[math.log(y) for y in x] for x in xTrain] xTest = [[math.log(y) for y in x] for x in xTest] yTrain = [math.log(x) for x in yTrain] # Detrend the time series indices = np.arange(len(data[1])) #print indices trainIndices = indices[0:cutoff] testIndices = indices[cutoff:] #print testIndices detrended,slope,intercept = statistics.detrend(trainIndices,yTrain) yTrain = detrended for gen in range(len(kernelList)): # Use SVR to predict test observations based upon training observations pred = svrPredictions(xTrain,yTrain,xTest,kernelList[gen]) # Add the trend back into the predictions trendedPred = statistics.reapplyTrend(testIndices,pred,slope,intercept) # Reverse the normalization trendedPred = [math.exp(x) for x in trendedPred] # Compute the NRMSE err = statistics.normRmse(yTest,trendedPred) print "The Normalized Root-Mean Square Error is " + str(err) + " using kernel " + names[gen] + "..." preds.append(trendedPred) names.append("actual") preds.append(yTest) visualizer.comparisonPlot(2014,1,1,preds,names,plotName="Support Vector Regression Load Predictions vs. Actual", yAxisName="Predicted Kilowatts")
def plotOrigVsDetrend(): data = constructData() # Original time series data1 = constructData() origY = data1[1][0:len(data[1]) - 365] # Detrended time series indices = np.arange(len(data[1]) - 365) detrendY = statistics.detrend(indices, data[1][0:len(data[1]) - 365])[0] visualizer.comparisonPlot( 2009, 1, 1, origY, detrendY, "Original", "Detrended", plotName="Aggregate Electric Load : Original & Detrended", yAxisName="Kilowatts")
def suppVectorRegress(): kernelList = ["linear","rbf",polyKernel] names = ["linear","radial basis","poly"] preds = [] # Retrieve time series data & apply preprocessing data = constructData() # 2014 had 365 days, but we take the last 364 days since # the last day has no numerical value cutoff = len(data)-364 xTrain = data[0][0:cutoff] yTrain = data[1][0:cutoff] xTest = data[0][cutoff:] yTest = data[1][cutoff:] # Fill in missing values denoted by zeroes as an average of # both neighbors statistics.estimateMissing(xTrain,0.0) statistics.estimateMissing(xTest,0.0) # Logarithmically scale the data xTrain = [[math.log(y) for y in x] for x in xTrain] xTest = [[math.log(y) for y in x] for x in xTest] yTrain = [math.log(x) for x in yTrain] # Detrend the time series indices = np.arange(len(data[1])) trainIndices = indices[0:cutoff] testIndices = indices[cutoff:] detrended,slope,intercept = statistics.detrend(trainIndices,yTrain) yTrain = detrended for gen in range(len(kernelList)): # Use SVR to predict test observations based upon training observations pred = svrPredictions(xTrain,yTrain,xTest,kernelList[gen]) # Add the trend back into the predictions trendedPred = statistics.reapplyTrend(testIndices,pred,slope,intercept) # Reverse the normalization trendedPred = [math.exp(x) for x in trendedPred] # Compute the NRMSE err = statistics.normRmse(yTest,trendedPred) print "The Normalized Root-Mean Square Error is " + str(err) + " using kernel " + names[gen] + "..." preds.append(trendedPred) names.append("actual") preds.append(yTest) visualizer.comparisonPlot(2014,1,1,preds,names,plotName="Support Vector Regression Load Predictions vs. Actual", yAxisName="Predicted Kilowatts")
def neuralNetwork(): # Retrieve time series data & apply preprocessing data = constructData() print len(data) # 2014 had 365 days, but we take the last 364 days since # the last day has no numerical value #cutoff = len(data[0])-89 cutoff = len(data[0]) - 89 #print "cutoff"+str(cutoff) xTrain = data[0][0:cutoff] #print xTrain[47] print xTrain yTrain = data[1][0:cutoff] xTest = data[0][cutoff:] #print cutoff #print xTest[0] yTest = data[1][cutoff:] # Fill in missing values denoted by zeroes as an average of # both neighbors statistics.estimateMissing(xTrain, 0.0) statistics.estimateMissing(xTest, 0.0) xTrain = [[math.log(y) for y in x] for x in xTrain] xTest = [[math.log(y) for y in x] for x in xTest] yTrain = [math.log(x) for x in yTrain] # Detrend the time series indices = np.arange(len(data[1])) trainIndices = indices[0:cutoff] testIndices = indices[cutoff:] detrended, slope, intercept = statistics.detrend(trainIndices, yTrain) yTrain = detrended dimensions = [6, 10, 12] neurons = [30, 50, 50] names = [] for x in range(len(dimensions)): s = "d=" + str(dimensions[x]) + ",h=" + str(neurons[x]) names.append(s) preds = [] for x in range(len(dimensions)): # Perform dimensionality reduction on the feature vectors pca = PCA(n_components=dimensions[x]) pca.fit(xTrain) xTrainRed = pca.transform(xTrain) xTestRed = pca.transform(xTest) pred = fit_predict(xTrainRed, yTrain, xTestRed, 40, neurons[x]) # Add the trend back into the predictions trendedPred = statistics.reapplyTrend(testIndices, pred, slope, intercept) # Reverse the normalization trendedPred = [math.exp(x) for x in trendedPred] # Compute the NRMSE err = statistics.normRmse(yTest, trendedPred) # Append computed predictions to list for classifier predictions preds.append(trendedPred) print "The NRMSE for the neural network is " + str(err) + "..." preds.append(yTest) names.append("actual") visualizer.comparisonPlot( 2014, 1, 1, preds, names, plotName="Neural Network Load Predictions vs. Actual", yAxisName="Predicted Kilowatts")
def plotPeriodogram(): data = constructData() visualizer.periodogramPlot( data[1][len(data[1]) - 730:len(data[1]) - 365], "Periodogram of Average Total Electricity Load : 2013")
def plotLag(): data = constructData() visualizer.lagPlot(data[1][0:len(data[1]) - 365], "Average Total Electricity Load Lag : 2009-2013")
def plotCorrelogram(): data = constructData() visualizer.autoCorrPlot( data[1][len(data[1]) - 730:len(data[1]) - 365], "Average Total Electricity Load Autocorrelations : 2013")
def plotOriginal(): data = constructData() # Plot of aggregate electricity demand over the past 5 years section = data[1][0:len(data[1])-365] visualizer.yearlyPlot(section, 2009,1,1,"Average Total Electricity Load : 2009-2013","Kilowatts")
def plotCorrelogram(): data = constructData() visualizer.autoCorrPlot(data[1][len(data[1])-730:len(data[1])-365],"Average Total Electricity Load Autocorrelations : 2013")
def clustering(): # Retrieve time series data & apply preprocessing data = constructData() # 2014 had 365 days, but we take the last 364 days since # the last day has no numerical value cutoff = len(data)-364 xTrain = data[0][0:cutoff] yTrain = data[1][0:cutoff] xTest = data[0][cutoff:] yTest = data[1][cutoff:] # Fill in missing values denoted by zeroes as an average of # both neighbors statistics.estimateMissing(xTrain,0.0) statistics.estimateMissing(xTest,0.0) # Logarithmically scale the data xTrain = [[math.log(y) for y in x] for x in xTrain] xTest = [[math.log(y) for y in x] for x in xTest] yTrain = [math.log(x) for x in yTrain] # Detrend the time series indices = np.arange(len(data[1])) trainIndices = indices[0:cutoff] testIndices = indices[cutoff:] detrended,slope,intercept = statistics.detrend(trainIndices,yTrain) yTrain = detrended # Compute centroids and labels of data cward_7,lward_7 = hierarchicalClustering(xTrain,7) cward_365,lward_365 = hierarchicalClustering(xTrain,365) ckmeans_7,lkmeans_7 = kMeansClustering(xTrain,7) ckmeans_365,lkmeans_365 = kMeansClustering(xTrain,365) c = [cward_7,cward_365,ckmeans_7,ckmeans_365] l = [lward_7,lward_365,lkmeans_7,lkmeans_365] algNames = ["agglomerative(7)","agglomerative(365)","k-means(7)","k-means(365)"] preds = [] for t in range(len(c)): # The centroids computed by the current clustering algorithm centroids = c[t] # The labels for the examples defined by the current clustering assignment labels = l[t] # Separate the training samples into cluster sets clusterSets = [] # Time labels for the examples, separated into clusters timeLabels = [] for x in range(len(centroids)): clusterSets.append([]) for x in range(len(labels)): # Place the example into its cluster clusterSets[labels[x]].append((xTrain[x],yTrain[x])) # Compute predictions for each of the test examples pred = predictClustering(centroids,clusterSets,xTest,"euclidean") # Add the trend back into the predictions trendedPred = statistics.reapplyTrend(testIndices,pred,slope,intercept) # Reverse the normalization trendedPred = [math.exp(x) for x in trendedPred] # Compute the NRMSE err = statistics.normRmse(yTest,trendedPred) # Add to list of predictions preds.append(trendedPred) print "The Normalized Root-Mean Square Error is " + str(err) + " using algorithm " + algNames[t] + "..." algNames.append("actual") preds.append(yTest) visualizer.comparisonPlot(2014,1,1,preds,algNames, plotName="Clustering Load Predictions vs. Actual", yAxisName="Predicted Kilowatts")
def clustering(): # Retrieve time series data & apply preprocessing data = constructData() # 2014 had 365 days, but we take the last 364 days since # the last day has no numerical value cutoff = len(data) - 364 xTrain = data[0][0:cutoff] yTrain = data[1][0:cutoff] xTest = data[0][cutoff:] yTest = data[1][cutoff:] # Fill in missing values denoted by zeroes as an average of # both neighbors statistics.estimateMissing(xTrain, 0.0) statistics.estimateMissing(xTest, 0.0) # Logarithmically scale the data xTrain = [[math.log(y) for y in x] for x in xTrain] xTest = [[math.log(y) for y in x] for x in xTest] yTrain = [math.log(x) for x in yTrain] # Detrend the time series indices = np.arange(len(data[1])) trainIndices = indices[0:cutoff] testIndices = indices[cutoff:] detrended, slope, intercept = statistics.detrend(trainIndices, yTrain) yTrain = detrended # Compute centroids and labels of data cward_7, lward_7 = hierarchicalClustering(xTrain, 7) cward_365, lward_365 = hierarchicalClustering(xTrain, 365) ckmeans_7, lkmeans_7 = kMeansClustering(xTrain, 7) ckmeans_365, lkmeans_365 = kMeansClustering(xTrain, 365) c = [cward_7, cward_365, ckmeans_7, ckmeans_365] l = [lward_7, lward_365, lkmeans_7, lkmeans_365] algNames = [ "agglomerative(7)", "agglomerative(365)", "k-means(7)", "k-means(365)" ] preds = [] for t in range(len(c)): # The centroids computed by the current clustering algorithm centroids = c[t] # The labels for the examples defined by the current clustering assignment labels = l[t] # Separate the training samples into cluster sets clusterSets = [] # Time labels for the examples, separated into clusters timeLabels = [] for x in range(len(centroids)): clusterSets.append([]) for x in range(len(labels)): # Place the example into its cluster clusterSets[labels[x]].append((xTrain[x], yTrain[x])) # Compute predictions for each of the test examples pred = predictClustering(centroids, clusterSets, xTest, "euclidean") # Add the trend back into the predictions trendedPred = statistics.reapplyTrend(testIndices, pred, slope, intercept) # Reverse the normalization trendedPred = [math.exp(x) for x in trendedPred] # Compute the NRMSE err = statistics.normRmse(yTest, trendedPred) # Add to list of predictions preds.append(trendedPred) print "The Normalized Root-Mean Square Error is " + str( err) + " using algorithm " + algNames[t] + "..." algNames.append("actual") preds.append(yTest) visualizer.comparisonPlot( 2014, 1, 1, preds, algNames, plotName="Clustering Load Predictions vs. Actual", yAxisName="Predicted Kilowatts")
def plotPeriodogram(): data = constructData() visualizer.periodogramPlot(data[1][len(data[1])-730:len(data[1])-365], "Periodogram of Average Total Electricity Load : 2013")
def plotLag(): data = constructData() visualizer.lagPlot(data[1][0:len(data[1])-365],"Average Total Electricity Load Lag : 2009-2013")
def gaussianProcesses(): corrMods = [ 'cubic', 'squared_exponential', 'absolute_exponential', 'linear' ] preds = [] # Retrieve time series data & apply preprocessing data = constructData() # 2014 had 365 days, but we take the last 364 days since # the last day has no numerical value cutoff = len(data) - 364 xTrain = data[0][0:cutoff] yTrain = data[1][0:cutoff] xTest = data[0][cutoff:] yTest = data[1][cutoff:] # Fill in missing values denoted by zeroes as an average of # both neighbors statistics.estimateMissing(xTrain, 0.0) statistics.estimateMissing(xTest, 0.0) # Logarithmically scale the data xTrain = [[math.log(y) for y in x] for x in xTrain] xTest = [[math.log(y) for y in x] for x in xTest] yTrain = [math.log(x) for x in yTrain] # Detrend the time series indices = np.arange(len(data[1])) trainIndices = indices[0:cutoff] testIndices = indices[cutoff:] detrended, slope, intercept = statistics.detrend(trainIndices, yTrain) yTrain = detrended for gen in range(len(corrMods)): # Use GPR to predict test observations based upon training observations pred = gaussProcPred(xTrain, yTrain, xTest, corrMods[gen]) # Add the trend back into the predictions trendedPred = statistics.reapplyTrend(testIndices, pred, slope, intercept) # Reverse the normalization trendedPred = [math.exp(x) for x in trendedPred] # Compute the NRMSE err = statistics.normRmse(yTest, trendedPred) print "The Normalized Root-Mean Square Error is " + str( err) + " using covariance function " + corrMods[gen] + "..." preds.append(trendedPred) corrMods.append("actual") data = constructData() cutoff = len(data) - 364 yTest = data[1][cutoff:] preds.append(yTest) visualizer.comparisonPlot( 2014, 1, 1, preds, corrMods, plotName="Gaussian Process Regression Load Predictions vs. Actual", yAxisName="Predicted Kilowatts")
def neuralNetwork(): # Retrieve time series data & apply preprocessing data = constructData() # 2014 had 365 days, but we take the last 364 days since # the last day has no numerical value cutoff = len(data)-364 xTrain = data[0][0:cutoff] yTrain = data[1][0:cutoff] xTest = data[0][cutoff:] yTest = data[1][cutoff:] # Fill in missing values denoted by zeroes as an average of # both neighbors statistics.estimateMissing(xTrain,0.0) statistics.estimateMissing(xTest,0.0) xTrain = [[math.log(y) for y in x] for x in xTrain] xTest = [[math.log(y) for y in x] for x in xTest] yTrain = [math.log(x) for x in yTrain] # Detrend the time series indices = np.arange(len(data[1])) trainIndices = indices[0:cutoff] testIndices = indices[cutoff:] detrended,slope,intercept = statistics.detrend(trainIndices,yTrain) yTrain = detrended dimensions = [6,10,12] neurons = [30,50,50] names = [] for x in range(len(dimensions)): s = "d=" + str(dimensions[x]) + ",h=" + str(neurons[x]) names.append(s) preds = [] for x in range(len(dimensions)): # Perform dimensionality reduction on the feature vectors pca = PCA(n_components=dimensions[x]) pca.fit(xTrain) xTrainRed = pca.transform(xTrain) xTestRed = pca.transform(xTest) pred = fit_predict(xTrainRed,yTrain,xTestRed,40,neurons[x]) # Add the trend back into the predictions trendedPred = statistics.reapplyTrend(testIndices,pred,slope,intercept) # Reverse the normalization trendedPred = [math.exp(x) for x in trendedPred] # Compute the NRMSE err = statistics.normRmse(yTest,trendedPred) # Append computed predictions to list for classifier predictions preds.append(trendedPred) print "The NRMSE for the neural network is " + str(err) + "..." preds.append(yTest) names.append("actual") visualizer.comparisonPlot(2014,1,1,preds,names,plotName="Neural Network Load Predictions vs. Actual", yAxisName="Predicted Kilowatts")